Merge release-5-0 into master
authorRoland Schulz <roland@utk.edu>
Thu, 4 Sep 2014 16:48:52 +0000 (12:48 -0400)
committerRoland Schulz <roland@utk.edu>
Fri, 5 Sep 2014 08:11:45 +0000 (04:11 -0400)
Conflicts:
CMakeLists.txt
trivial
share/template/cmake/FindGROMACS.cmakein
deleted in master
src/gromacs/utility/gmx_header_config_gen.h.cmakein
applied to src/gromacs/utility/gmx_header_config.h
src/gromacs/commandline/pargs.cpp
applied to src/gromacs/commandline/cmdlinemodulemanager.cpp
src/gromacs/gmxlib/gmx_thread_affinity.c
trivial
src/gromacs/gmxlib/main.cpp
applied to src/gromacs/utility/basenetwork.cpp
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/*.c
Merged the template and rerun the generator
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_template_sparc64_hpc_ace_double.pre
trivial
src/gromacs/utility/futil.cpp
change unnecessary (function deleted)
Other changes:
src/gromacs/fileio/vmdio.c
removed duplicate config.h

Change-Id: Ib4237944773e41d7b52e8c0ee181da717d2b26f3

85 files changed:
1  2 
CMakeLists.txt
cmake/gmxCFlags.cmake
cmake/gmxDetectSimd.cmake
docs/install-guide/install-guide.md
src/config.h.cmakein
src/gromacs/commandline/cmdlinemodulemanager.cpp
src/gromacs/fileio/tngio.cpp
src/gromacs/fileio/vmdio.c
src/gromacs/gmxlib/checkpoint.cpp
src/gromacs/gmxlib/gmx_cpuid.c
src/gromacs/gmxlib/gmx_thread_affinity.c
src/gromacs/gmxlib/nonbonded/nb_free_energy.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW3W3_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW4W4_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJEw_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_template_sparc64_hpc_ace_double.pre
src/gromacs/gmxpreprocess/readir.c
src/gromacs/gmxpreprocess/toputil.c
src/gromacs/simd/simd.h
src/gromacs/simd/tests/simd.cpp
src/gromacs/simd/tests/simd4.cpp
src/gromacs/simd/tests/simd4_floatingpoint.cpp
src/gromacs/simd/tests/simd_floatingpoint.cpp
src/gromacs/utility/basenetwork.cpp
src/gromacs/utility/cstringutil.c
src/gromacs/utility/futil.cpp
src/gromacs/utility/gmx_header_config.h
src/gromacs/utility/gmxomp.h
src/gromacs/utility/smalloc.c
src/testutils/integrationtests.cpp

diff --combined CMakeLists.txt
index 5aafe926ada829b58ebc622faf85d92c0458ffe4,e87e6464acda800c6a2868b9524754245ddd7568..66144b8960685bc42ba4e114a9d0ed5ba9ff0459
@@@ -48,28 -48,75 +48,28 @@@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CM
  set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
  set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
  
 -# PROJECT_VERSION should have the following structure:
 -# VERSION-dev[-SUFFIX] where the VERSION should have the for: vMajor.vMinor.vPatch
 -#
 -# The "-dev" suffix is important to keep because it makes possible to distinguish
 -# between a build from official release and a build from git release branch on a
 -# machine with no git.
 -#
 -# NOTE: when releasing the "-dev" suffix needs to be stripped off!
 -# REGRESSIONTEST_VERSION and REGRESSIONTEST_BRANCH should always be
 -# defined.
 -set(PROJECT_VERSION "5.0.1-dev")
 -# If this is a released tarball, "-dev" will not be present in
 -# PROJECT_VERSION, and REGRESSIONTEST_VERSION specifies the version
 -# number of the regressiontest tarball against which the code tarball
 -# can be tested. This will be the version of the last patch release.
 -set(REGRESSIONTEST_VERSION "5.0.1-dev")
 -# The MD5 checksum of the regressiontest tarball. Only used if "-dev"
 -# is not present in the PROJECT_VERSION
 -set(REGRESSIONTEST_MD5SUM "a07524afebca5013540d4f2f72df2dce")
 -# If this is not a released tarball, "-dev" will be present in
 -# PROJECT_VERSION, and REGRESSIONTEST_BRANCH specifies the name of the
 -# gerrit.gromacs.org branch whose HEAD can test this code, *if* this
 -# code contains all recent fixes from the corresponding code branch.
 -set(REGRESSIONTEST_BRANCH "refs/heads/release-5-0")
 -
 -set(CUSTOM_VERSION_STRING ""
 -    CACHE STRING "Custom version string (if empty, use hard-coded default)")
 -mark_as_advanced(CUSTOM_VERSION_STRING)
 -if (CUSTOM_VERSION_STRING)
 -    set(PROJECT_VERSION ${CUSTOM_VERSION_STRING})
 -endif()
 -set(LIBRARY_SOVERSION 0)
 -set(LIBRARY_VERSION ${LIBRARY_SOVERSION}.0.0)
 -# It is a bit irritating, but this has to be set separately for now!
 -SET(CPACK_PACKAGE_VERSION_MAJOR "5")
 -SET(CPACK_PACKAGE_VERSION_MINOR "0")
 -#SET(CPACK_PACKAGE_VERSION_PATCH "0")
 -
 -# The numerical gromacs version. It is 40600 for 4.6.0.
 -# The #define GMX_VERSION in gromacs/version.h is set to this value.
 -math(EXPR NUM_VERSION
 -    "${CPACK_PACKAGE_VERSION_MAJOR}*10000 + ${CPACK_PACKAGE_VERSION_MINOR}*100")
 -if(CPACK_PACKAGE_VERSION_PATCH)
 -    math(EXPR NUM_VERSION
 -         "${NUM_VERSION} + ${CPACK_PACKAGE_VERSION_PATCH}")
 -endif()
 -
 -# The API version tracks the numerical Gromacs version (for now).
 -# It is potentially different from the Gromacs version in the future, if
 -# the programs/libraries diverge from the presumably more stable API.
 -# The #define GMX_API_VERSION in version.h is set to this value to
 -# provide backward compatibility of software written against the Gromacs API.
 -set(API_VERSION ${NUM_VERSION})
 +# Set up common version variables, as well as general information about
 +# the build tree (whether the build is from a source package or from a git
 +# repository).  Also declares a few functions that will be used for generating
 +# version info files later.
 +include(gmxVersionInfo)
  
  if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT AND UNIX)
      set(CMAKE_INSTALL_PREFIX "/usr/local/gromacs" CACHE STRING "Installation prefix (installation will need write permissions here)" FORCE)
  endif()
  
  include(gmxBuildTypeReference)
 +include(gmxBuildTypeProfile)
  include(gmxBuildTypeTSAN)
  include(gmxBuildTypeASAN)
  include(gmxBuildTypeReleaseWithAssert)
  
  if(NOT CMAKE_BUILD_TYPE)
 -    set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel Reference RelWithAssert." FORCE)
 +    set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel Reference RelWithAssert Profile." FORCE)
      # There's no need to offer a user the choice of ThreadSanitizer
      # Set the possible values of build type for cmake-gui
      set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release"
 -        "MinSizeRel" "RelWithDebInfo" "Reference" "RelWithAssert")
 +        "MinSizeRel" "RelWithDebInfo" "Reference" "RelWithAssert" "Profile")
  endif()
  if(CMAKE_CONFIGURATION_TYPES)
      # Add appropriate GROMACS-specific build types for the Visual
          "List of configuration types"
          FORCE)
  endif()
 -set(build_types_with_explicit_flags RELEASE DEBUG RELWITHDEBUGINFO RELWITHASSERT MINSIZEREL)
 +set(build_types_with_explicit_flags RELEASE DEBUG RELWITHDEBUGINFO RELWITHASSERT MINSIZEREL PROFILE)
  
 -enable_language(C)
 -enable_language(CXX)
+ set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ON)
 -
  set(CPACK_PACKAGE_NAME "gromacs")
 -set(CPACK_PACKAGE_VERSION ${PROJECT_VERSION})
 +set(CPACK_PACKAGE_VERSION_MAJOR ${GMX_VERSION_MAJOR})
 +set(CPACK_PACKAGE_VERSION_MINOR ${GMX_VERSION_MINOR})
 +set(CPACK_PACKAGE_VERSION_PATCH ${GMX_VERSION_PATCH})
 +set(CPACK_PACKAGE_VERSION       ${GMX_VERSION_STRING})
  set(CPACK_SOURCE_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}")
  set(CPACK_PACKAGE_VENDOR "gromacs.org")
  set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Gromacs - a toolkit for high-performance molecular simulation")
@@@ -117,9 -165,14 +118,9 @@@ set(MEMORYCHECK_SUPPRESSIONS_FIL
      "File that contains suppressions for the memory checker")
  include(CTest)
  
 -set(SOURCE_IS_GIT_REPOSITORY OFF)
 -set(SOURCE_IS_SOURCE_DISTRIBUTION OFF)
 -if(EXISTS "${CMAKE_SOURCE_DIR}/.git")
 -    set(SOURCE_IS_GIT_REPOSITORY ON)
 -endif()
 -if(NOT EXISTS "${CMAKE_SOURCE_DIR}/admin/.isreposource")
 -    set(SOURCE_IS_SOURCE_DISTRIBUTION ON)
 -endif()
 +# Variables that accumulate stuff influencing the installed headers
 +set(INSTALLED_HEADER_INCLUDE_DIRS "")
 +set(INSTALLED_HEADER_DEFINITIONS "")
  
  ########################################################################
  # Check and warn if cache generated on a different host is being reused
@@@ -138,6 -191,16 +139,16 @@@ if(CMAKE_HOST_UNIX
              "Hostname of the machine where the cache was generated.")
  endif()
  
+ ########################################################################
+ # Detect architecture before setting options so we can alter defaults
+ ########################################################################
+ # Detect the architecture the compiler is targetting, detect
+ # SIMD instructions possibilities on that hardware, suggest SIMD instruction set
+ # to use if none is specified, and populate the cache option for CPU
+ # SIMD.
+ include(gmxDetectTargetArchitecture)
+ gmx_detect_target_architecture()
  ########################################################################
  # User input options                                                   #
  ########################################################################
@@@ -145,7 -208,16 +156,16 @@@ include(gmxOptionUtilities
  
  set(CMAKE_PREFIX_PATH "" CACHE STRING "Extra locations to search for external libraries and tools (give directory without lib, bin, or include)")
  
- option(GMX_DOUBLE "Use double precision (much slower, use only if you really need it)" OFF)
+ if(GMX_TARGET_FUJITSU_SPARC64)
+     # Fujitsu only has SIMD in double precision, so this will be faster
+     set(GMX_DOUBLE_DEFAULT ON)
+ else()
+     set(GMX_DOUBLE_DEFAULT OFF)
+ endif()
+ option(GMX_DOUBLE "Use double precision (much slower, use only if you really need it)" ${GMX_DOUBLE_DEFAULT})
+ option(GMX_RELAXED_DOUBLE_PRECISION "Accept single precision 1/sqrt(x) when using Fujitsu HPC-ACE SIMD" OFF)
+ mark_as_advanced(GMX_RELAXED_DOUBLE_PRECISION)
  option(GMX_MPI    "Build a parallel (message-passing) version of GROMACS" OFF)
  option(GMX_THREAD_MPI  "Build a thread-MPI-based multithreaded version of GROMACS (not compatible with MPI)" ON)
  gmx_dependent_option(
@@@ -173,13 -245,6 +193,6 @@@ endif(
  set(REQUIRED_CUDA_COMPUTE_CAPABILITY 2.0)
  include(gmxManageGPU)
  
- # Detect the architecture the compiler is targetting, detect
- # SIMD instructions possibilities on that hardware, suggest SIMD instruction set
- # to use if none is specified, and populate the cache option for CPU
- # SIMD.
- include(gmxDetectTargetArchitecture)
- gmx_detect_target_architecture()
  if(GMX_CPU_ACCELERATION)
      # Stay compatible with old Jenkins command line options for specific SIMD acceleration
      set(GMX_SIMD "${GMX_CPU_ACCELERATION}" CACHE STRING "SIMD instruction set level and compiler optimization" FORCE)
@@@ -239,6 -304,9 +252,6 @@@ option(GMX_OPENMP "Enable OpenMP-based 
  
  option(GMX_USE_TNG "Use the TNG library for trajectory I/O" ON)
  
 -option(GMX_GIT_VERSION_INFO "Generate git version information" ${SOURCE_IS_GIT_REPOSITORY})
 -mark_as_advanced(GMX_GIT_VERSION_INFO)
 -
  if(UNIX)
      option(GMX_SYMLINK_OLD_BINARY_NAMES "Create symbolic links for pre-5.0 binary names" ON)
  endif()
@@@ -312,24 -380,22 +325,20 @@@ if(GMX_SIMD STREQUAL "AVX_256
  endif()
  
  
 -
 -set(PKG_CFLAGS "")
  if(GMX_DOUBLE)
      add_definitions(-DGMX_DOUBLE)
 -    set(PKG_CFLAGS "${PKG_CFLAGS} -DGMX_DOUBLE")
 +    list(APPEND INSTALLED_HEADER_DEFINITIONS "-DGMX_DOUBLE")
+     if(GMX_RELAXED_DOUBLE_PRECISION)
+         add_definitions(-DGMX_RELAXED_DOUBLE_PRECISION)
+     endif()
  endif()
  if(GMX_SOFTWARE_INVSQRT)
 -  set(PKG_CFLAGS "${PKG_CFLAGS} -DGMX_SOFTWARE_INVSQRT")
 +    list(APPEND INSTALLED_HEADER_DEFINITIONS "-DGMX_SOFTWARE_INVSQRT")
  endif()
  
  if(WIN32 AND NOT CYGWIN)
-     set(GMX_WSOCKLIB_PATH CACHE PATH "Path to winsock (wsock32.lib) library.")
-     mark_as_advanced(GMX_WSOCKLIB_PATH)
-     find_library(WSOCK32_LIBRARY NAMES wsock32 PATHS ${GMX_WSOCKLIB_PATH})
-     if(WSOCK32_LIBRARY)
-         list(APPEND GMX_EXTRA_LIBRARIES ${WSOCK32_LIBRARY})
-         add_definitions(-DGMX_HAVE_WINSOCK)
-     else()
-         message(STATUS "No winsock found. Cannot use interactive molecular dynamics (IMD).")
-     endif(WSOCK32_LIBRARY)
+     list(APPEND GMX_EXTRA_LIBRARIES "wsock32")
+     add_definitions(-DGMX_HAVE_WINSOCK)
  endif()
  
  
@@@ -388,6 -454,10 +397,10 @@@ if(${CMAKE_SYSTEM_NAME} MATCHES BlueGen
      include(gmxManageBlueGene)
  endif()
  
+ if(GMX_TARGET_FUJITSU_SPARC64)
+     include(gmxManageFujitsuSparc64)
+ endif()
  ########################################################################
  #Process MPI settings
  ########################################################################
@@@ -485,6 -555,25 +498,6 @@@ if(WIN32 AND NOT CYGWIN
      add_definitions(-DNOMINMAX)
  endif()
  
 -# only bother with finding git and using version.h if the source is a git repo
 -if(GMX_GIT_VERSION_INFO)
 -    if (NOT SOURCE_IS_GIT_REPOSITORY)
 -        message(FATAL_ERROR
 -            "Cannot generate git version information from source tree not under git. "
 -            "Set GMX_GIT_VERSION_INFO=OFF to proceed.")
 -    endif()
 -    # We need at least git v1.5.3 be able to parse git's date output. If not
 -    # found or the version is too small, we can't generate version information.
 -    find_package(Git)
 -
 -    if(NOT GIT_FOUND OR GIT_VERSION_STRING VERSION_LESS "1.5.3")
 -        message(FATAL_ERROR
 -            "No compatible git version found (>= 1.5.3 required). "
 -            "Won't be able to generate development version information. "
 -            "Set GMX_GIT_VERSION_INFO=OFF to proceed.")
 -    endif()
 -endif()
 -
  # Detect boost unless GMX_EXTERNAL_BOOST is explicitly OFF
  # Used for default if GMX_EXTERNAL_BOOST is not defined (first CMake pass)
  if(NOT DEFINED GMX_EXTERNAL_BOOST OR GMX_EXTERNAL_BOOST)
@@@ -527,6 -616,12 +540,6 @@@ if(GMX_USE_TNG AND NOT GMX_EXTERNAL_TNG
      gmx_test_zlib(HAVE_ZLIB)
  endif()
  
 -########################################################################
 -# Generate development version info for cache
 -########################################################################
 -# set(GEN_VERSION_INFO_INTERNAL "ON")
 -# include(gmxGenerateVersionString)
 -
  ########################################################################
  # Our own GROMACS tests
  ########################################################################
@@@ -536,6 -631,10 +549,6 @@@ include_directories(BEFORE ${CMAKE_SOUR
  include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/external/thread_mpi/include)
  # Required for config.h, maybe should only be set in src/CMakeLists.txt
  include_directories(BEFORE ${CMAKE_BINARY_DIR}/src)
 -# Required for gmx_header_config_gen.h to be found before installation
 -include_directories(BEFORE ${CMAKE_BINARY_DIR}/src/gromacs/utility)
 -# Required for now to make old code compile
 -include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/gromacs/legacyheaders)
  
  include(gmxTestInlineASM)
  gmx_test_inline_asm_gcc_x86(GMX_X86_GCC_INLINE_ASM)
@@@ -729,6 -828,7 +742,6 @@@ else(
      set(GMX_EXE_LINKER_FLAGS ${GMX_EXE_LINKER_FLAGS} ${OpenMP_LINKER_FLAGS})
      set(GMX_SHARED_LINKER_FLAGS ${GMX_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS})
  endif()
 -set(PKG_CFLAGS "${PKG_CFLAGS} ${OpenMP_C_FLAGS}")
  
  ########################################################################
  # Specify install locations
@@@ -745,24 -845,17 +758,24 @@@ mark_as_advanced(GMX_LIB_INSTALL_DIR GM
  
  # These variables are used internally to provide a central location for
  # customizing the install locations.
 -set(LIB_INSTALL_DIR  ${GMX_LIB_INSTALL_DIR})
 -set(BIN_INSTALL_DIR  bin)
 -set(DATA_INSTALL_DIR share/${GMX_DATA_INSTALL_DIR})
 -set(MAN_INSTALL_DIR  share/man)
 -set(INCL_INSTALL_DIR include)
 +set(LIB_INSTALL_DIR       ${GMX_LIB_INSTALL_DIR})
 +set(BIN_INSTALL_DIR       bin)
 +set(DATA_INSTALL_DIR      share/${GMX_DATA_INSTALL_DIR})
 +set(MAN_INSTALL_DIR       share/man)
 +# If the nesting level wrt. the installation root is changed,
 +# gromacs-config.cmake.cmakein needs to be adapted.
 +set(CMAKE_INSTALL_DIR     share/cmake)
 +# TODO: Make GMXRC adapt if this is changed
 +set(PKGCONFIG_INSTALL_DIR ${LIB_INSTALL_DIR}/pkgconfig)
 +set(INCL_INSTALL_DIR      include)
  
  # These variables get written into config.h for use in finding the data
  # directories.
  set(GMXLIB_SEARCH_DIR share/${GMX_DATA_INSTALL_DIR}/top)
  set(GMXLIB_FALLBACK   ${CMAKE_INSTALL_PREFIX}/${DATA_INSTALL_DIR}/top)
  
 +list(APPEND INSTALLED_HEADER_INCLUDE_DIRS ${INCL_INSTALL_DIR})
 +
  # Binary and library suffix options
  include(gmxManageSuffixes)
  
@@@ -808,14 -901,15 +821,14 @@@ install(FILES COPYING DESTINATION ${DAT
  
  if(GMX_EXTERNAL_BOOST)
      include_directories(${Boost_INCLUDE_DIRS})
 -    set(PKG_CFLAGS "${PKG_CFLAGS} -I${Boost_INCLUDE_DIRS}")
 +    list(APPEND INSTALLED_HEADER_INCLUDE_DIRS ${Boost_INCLUDE_DIRS})
  else()
      include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/external/boost)
 +    list(APPEND INSTALLED_HEADER_INCLUDE_DIRS ${INCL_INSTALL_DIR}/gromacs/external/boost)
 +    list(APPEND INSTALLED_HEADED_DEFINITIONS "-DBOOST_NO_TYPEID")
      # typeid not supported for minimal internal version
      # (would add significant amount of code)
      add_definitions(-DBOOST_NO_TYPEID)
 -    # TODO: Propagate the above settings to the installed CMakeFiles.txt template
 -    # (from share/template/)
 -    set(PKG_CFLAGS "${PKG_CFLAGS} -DBOOST_NO_TYPEID -I${CMAKE_INSTALL_PREFIX}/${INCL_INSTALL_DIR}/gromacs/external/boost")
      if (NOT GMX_BUILD_MDRUN_ONLY)
          install(DIRECTORY ${CMAKE_SOURCE_DIR}/src/external/boost/boost
                  DESTINATION ${INCL_INSTALL_DIR}/gromacs/external/boost
diff --combined cmake/gmxCFlags.cmake
index 799bea1c7bc8f5438ea74a2f857dcd8b57ec61d2,6ad1bf83f5fdeb794f27695345afa17c44e82152..c0eff548ba523da0122f679e4cfc7c064d125d58
@@@ -64,7 -64,7 +64,7 @@@ function(gmx_set_cmake_compiler_flags
          # be set up elsewhere and passed to this function, but it is
          # inconvenient in CMake to pass more than one list, and such a
          # list is only used here.
 -        foreach(build_type RELWITHDEBUGINFO RELWITHASSERT MINSIZEREL)
 +        foreach(build_type RELWITHDEBUGINFO RELWITHASSERT MINSIZEREL PROFILE)
              set(GMXC_${language}FLAGS_${build_type} "${GMXC_${language}FLAGS_RELEASE}")
          endforeach()
          # Copy the flags that are only used by the real Release build
@@@ -110,12 -110,14 +110,12 @@@ MACRO(gmx_c_flags
          # Since 4.8 on by default. For previous version disabling is a no-op. Only disabling for Release because with assert
          # the warnings are OK.
          GMX_TEST_CFLAG(CFLAGS_WARN_REL "-Wno-array-bounds" GMXC_CFLAGS_RELEASE_ONLY)
 -        # Since gcc 4.8 strict - false postives with old gmx_fatal. TODO: Remove in master
 -        GMX_TEST_CFLAG(CFLAGS_WARN_UNINIT "-Wno-maybe-uninitialized" GMXC_CFLAGS)
          if(CYGWIN)
              GMX_TEST_CFLAG(CFLAGS_WARN_SUBSCRIPT "-Wno-char-subscripts" GMXC_CFLAGS)
          endif()
          # new in gcc 4.5
          GMX_TEST_CFLAG(CFLAGS_EXCESS_PREC "-fexcess-precision=fast" GMXC_CFLAGS_RELEASE)
 -        GMX_TEST_CFLAG(CFLAGS_COPT "-fomit-frame-pointer -funroll-all-loops"
 +        GMX_TEST_CFLAG(CFLAGS_COPT "-funroll-all-loops"
                         GMXC_CFLAGS_RELEASE)
          GMX_TEST_CFLAG(CFLAGS_NOINLINE "-fno-inline" GMXC_CFLAGS_DEBUG)
      endif()
          GMX_TEST_CFLAG(CXXFLAGS_WARN_REL "-Wno-array-bounds" GMXC_CXXFLAGS_RELEASE_ONLY)
          # new in gcc 4.5
          GMX_TEST_CXXFLAG(CXXFLAGS_EXCESS_PREC "-fexcess-precision=fast" GMXC_CXXFLAGS_RELEASE)
 -        GMX_TEST_CXXFLAG(CXXFLAGS_COPT "-fomit-frame-pointer -funroll-all-loops"
 +        GMX_TEST_CXXFLAG(CXXFLAGS_COPT "-funroll-all-loops"
                           GMXC_CXXFLAGS_RELEASE)
          GMX_TEST_CXXFLAG(CXXFLAGS_NOINLINE "-fno-inline" GMXC_CXXFLAGS_DEBUG)
      endif()
          GMX_TEST_CXXFLAG(CXXFLAGS_WARN_EXTRA "-Wextra -Wno-missing-field-initializers -Wpointer-arith" GMXC_CXXFLAGS)
      endif()
  
+     # Fujitsu compilers on PrimeHPC/Sparc64
+     if(${CMAKE_C_COMPILER_ID} MATCHES Fujitsu OR
+        (${CMAKE_C_COMPILER_ID} MATCHES unknown AND ${CMAKE_C_COMPILER} MATCHES ^fcc))
+         GMX_TEST_CFLAG(CFLAG_GNUCOMPAT "-Xg -w" GMXC_CFLAGS)
+         GMX_TEST_CFLAG(CFLAG_OPT "-Kfast,reduction,swp,simd=2,uxsimd,fsimple -x100" GMXC_CFLAGS)
+     endif()
+     if(${CMAKE_CXX_COMPILER_ID} MATCHES Fujitsu OR
+        (${CMAKE_CXX_COMPILER_ID} MATCHES unknown AND ${CMAKE_CXX_COMPILER} MATCHES ^FCC))
+         GMX_TEST_CXXFLAG(CXXFLAG_GNUCOMPAT "-Xg -w" GMXC_CXXFLAGS)
+         GMX_TEST_CXXFLAG(CXXFLAG_OPT "-Kfast,reduction,swp,simd=2,uxsimd,fsimple -x100" GMXC_CXXFLAGS)
+     endif()
      # now actually set the flags:
      if (NOT GMX_SKIP_DEFAULT_CFLAGS)
          gmx_set_cmake_compiler_flags()
index a8ac28260544cf0ddaa490571bee82a348bce7d2,199c4c3cde9469c0c3ee3872a4f256bef265344b..49fa77a1fcced9b6d45b15bc022d0999578222a2
@@@ -62,7 -62,7 +62,7 @@@ function(gmx_suggest_x86_simd _suggeste
      message(STATUS "Detecting best SIMD instructions for this CPU")
  
      # Get CPU SIMD properties information
 -    set(_compile_definitions "${GCC_INLINE_ASM_DEFINE} -I${CMAKE_SOURCE_DIR}/src/gromacs/legacyheaders -DGMX_CPUID_STANDALONE")
 +    set(_compile_definitions "${GCC_INLINE_ASM_DEFINE} -I${CMAKE_SOURCE_DIR}/src -DGMX_CPUID_STANDALONE")
      if(GMX_TARGET_X86)
          set(_compile_definitions "${_compile_definitions} -DGMX_TARGET_X86")
      endif()
@@@ -94,6 -94,10 +94,10 @@@ function(gmx_detect_simd _suggested_sim
      if(NOT DEFINED GMX_SIMD)
          if(GMX_TARGET_BGQ)
              set(${_suggested_simd} "IBM_QPX")
+         elseif(GMX_TARGET_FUJITSU_SPARC64)
+             # HPC-ACE is always present. In the future we
+             # should add detection for HPC-ACE2 here.
+             set(${_suggested_simd} "Sparc64_HPC_ACE")
          elseif(GMX_TARGET_X86)
              gmx_suggest_x86_simd(${_suggested_simd})
          else()
index d34a3b5d67c9c3280651c36cf34454b7a942ed53,67185db2cf2768ae17549ea0490c8c4c2535377c..3ce470db15e828f68a464632aed8621b09552f20
@@@ -650,13 -650,6 +650,13 @@@ CMakeLists.txt
      is determined by CMake.
      The name of the directory can be changed using `GMX_LIB_INSTALL_DIR` CMake
      variable.
 +`lib/pkgconfig/`
 +  : Information about the installed `libgromacs` library for `pkg-config` is
 +    installed here.  The `lib/` part adapts to the installation location of the
 +    libraries.  The installed files contain the installation prefix as absolute
 +    paths.
 +`share/cmake/`
 +  : CMake package configuration files are installed here.
  `share/gromacs/`
    : Various data files and some documentation go here.
      The `gromacs` part can be changed using `GMX_DATA_INSTALL_DIR`. Using this
@@@ -931,7 -924,7 +931,7 @@@ The recommended configuration is to us
  
      cmake .. -DCMAKE_C_COMPILER=mpicc \
               -DCMAKE_CXX_COMPILER=mpicxx \
-              -DCMAKE_TOOLCHAIN_FILE=Platform/BlueGeneQ-static-XL-CXX \
+              -DCMAKE_TOOLCHAIN_FILE=Platform/BlueGeneQ-static-XL-CXX.cmake \
               -DCMAKE_PREFIX_PATH=/your/fftw/installation/prefix \
               -DGMX_MPI=ON \
               -DGMX_BUILD_MDRUN_ONLY=ON
@@@ -964,8 -957,25 +964,25 @@@ add it. The default plain C kernels wil
  
  This is the architecture of the K computer, which uses Fujitsu
  `Sparc64VIIIfx` chips. On this platform, GROMACS @PROJECT_VERSION@ has
- accelerated group kernels, no accelerated Verlet kernels, and a custom
- build toolchain.
+ accelerated group kernels using the HPC-ACE instructions, no
+ accelerated Verlet kernels, and a custom build toolchain. Since this
+ particular chip only does double precision SIMD, the default setup
+ is to build Gromacs in double. Since most users only need single, we have added
+ an option GMX_RELAXED_DOUBLE_PRECISION to accept single precision square root
+ accuracy in the group kernels; unless you know that you really need 15 digits
+ of accuracy in each individual force, we strongly recommend you use this. Note
+ that all summation and other operations are still done in double.
+ The recommended configuration is to use
+     cmake .. -DCMAKE_TOOLCHAIN_FILE=Toolchain-Fujitsu-Sparc64-mpi.cmake \
+              -DCMAKE_PREFIX_PATH=/your/fftw/installation/prefix \
+              -DCMAKE_INSTALL_PREFIX=/where/gromacs/should/be/installed \
+              -DGMX_MPI=ON \
+              -DGMX_BUILD_MDRUN_ONLY=ON \
+              -DGMX_RELAXED_DOUBLE_PRECISION=ON
+     make
+     make install
  
  ### Intel Xeon Phi ###
  
diff --combined src/config.h.cmakein
index 32e4cf00417eed7dd4d539d55b3e1013cfd9d812,a71fdad597d4afdb2f5d77b0018b247b4e7038ff..4fdf9d7369a3755e0fa73ef3a751dd3a2590dc6b
@@@ -41,6 -41,8 +41,8 @@@
   *
   * \inlibraryapi
   */
+ #ifndef GMX_CONFIG_H
+ #define GMX_CONFIG_H
  #include "gromacs/utility/gmx_header_config.h"
  
  /* TODO: For now, disable Doxygen warnings from here */
  /* Binary directory for the build */
  #cmakedefine CMAKE_BINARY_DIR "@CMAKE_BINARY_DIR@"
  
 -/* Turn off water-water neighborlist optimization only - not used right now */
 -#cmakedefine DISABLE_WATERWATER_NLIST
 -
 -/* Turn off all water neighborlist optimization - not used right now */
 -#cmakedefine DISABLE_WATER_NLIST
 -
  /* IEEE754 floating-point format. Memory layout is defined by macros
   * GMX_IEEE754_BIG_ENDIAN_BYTE_ORDER and GMX_IEEE754_BIG_ENDIAN_WORD_ORDER. 
   */
  /* Target platform is BlueGene/Q */
  #cmakedefine GMX_TARGET_BGQ
  
 +/** Define if we are building for Cygwin */
 +#cmakedefine GMX_CYGWIN
 +
 +/** Define if we have sufficient C++11 support */
 +#cmakedefine GMX_CXX11
 +
  /* GCC bug in AVX maskload/maskstore arguments - worked around internally */
  #cmakedefine GMX_SIMD_X86_AVX_GCC_MASKLOAD_BUG
  
  /* Define if SIGUSR1 is present */
  #cmakedefine HAVE_SIGUSR1
  
 -/* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */
 -#cmakedefine _LARGEFILE_SOURCE
 -
 -/* Define for large files, on AIX-style hosts. */
 -#cmakedefine _LARGE_FILES
 -
 -/* Some systems requires this to be set to 64 for large file support */
 -#cmakedefine _FILE_OFFSET_BITS @_FILE_OFFSET_BITS@
 -
 -/* Build special-purpose mdrun library */
 -#cmakedefine GMX_FAHCORE   
 -
  /* Enable gromacs quotes */
  #cmakedefine GMX_COOL_QUOTES
  
 -#ifdef GMX_FAHCORE
 -#define FULLINDIRECT 1
 -#define USE_FAH_XDR  1
 -#include "swindirect.h"
 -#endif
 -
  /* default name mangling maybe wrong on exotic plattforms */
  #define F77_FUNC(name,NAME) name ## _
  
  /* Define if we have zlib */
  #cmakedefine HAVE_ZLIB
  
+ #endif
  /*! \endcond */
index ac5f422d332e10ec9314c46f24854cefd39ab64f,01d9fa7401cd23045722dbaede233a6edf66251c..b026f8b0db6382f5ac1fec59895ca35c8d7c8188
@@@ -39,8 -39,6 +39,8 @@@
   * \author Teemu Murtola <teemu.murtola@gmail.com>
   * \ingroup module_commandline
   */
 +#include "gmxpre.h"
 +
  #include "cmdlinemodulemanager.h"
  
  #include <cstdio>
  #include <string>
  #include <utility>
  
 +#include "config.h"
 +
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
  #include "gromacs/legacyheaders/copyrite.h"
 -#include "gromacs/legacyheaders/network.h"
  
  #include "gromacs/commandline/cmdlinehelpcontext.h"
  #include "gromacs/commandline/cmdlinehelpmodule.h"
  #include "gromacs/commandline/cmdlineprogramcontext.h"
  #include "gromacs/options/basicoptions.h"
  #include "gromacs/options/options.h"
 +#include "gromacs/utility/basenetwork.h"
  #include "gromacs/utility/exceptions.h"
 +#include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/gmxassert.h"
  #include "gromacs/utility/stringutil.h"
  
 -// For GMX_BINARY_SUFFIX
 -#include "config.h"
 -
  namespace gmx
  {
  
@@@ -120,22 -114,26 +120,22 @@@ class CMainCommandLineModule : public C
              return shortDescription_;
          }
  
 +        virtual void init(CommandLineModuleSettings * /*settings*/)
 +        {
 +        }
          virtual int run(int argc, char *argv[])
          {
              return mainFunction_(argc, argv);
          }
          virtual void writeHelp(const CommandLineHelpContext &context) const
          {
 -            char *argv[2];
 -            int   argc = 1;
 -            // TODO: The constness should not be cast away.
 -            argv[0] = const_cast<char *>(name_);
 -            argv[1] = NULL;
 -            GlobalCommandLineHelpContext global(context);
 -            mainFunction_(argc, argv);
 +            writeCommandLineHelpCMain(context, name_, mainFunction_);
          }
  
      private:
          const char             *name_;
          const char             *shortDescription_;
          CMainFunction           mainFunction_;
 -
  };
  
  //! \}
  
  CommandLineCommonOptionsHolder::CommandLineCommonOptionsHolder()
      : options_(NULL, NULL), bHelp_(false), bHidden_(false),
 -      bQuiet_(false), bVersion_(false), bCopyright_(true)
 +      bQuiet_(false), bVersion_(false), bCopyright_(true),
 +      niceLevel_(19), debugLevel_(0)
  {
      binaryInfoSettings_.copyright(true);
  }
@@@ -171,12 -168,6 +171,12 @@@ void CommandLineCommonOptionsHolder::in
                             .description("Print extended version information and quit"));
      options_.addOption(BooleanOption("copyright").store(&bCopyright_)
                             .description("Print copyright information on startup"));
 +    options_.addOption(IntegerOption("nice").store(&niceLevel_)
 +                           .description("Set the nicelevel (default depends on command)"));
 +    options_.addOption(IntegerOption("debug").store(&debugLevel_)
 +                           .hidden().defaultValueIfSet(1)
 +                           .description("Write file with debug information, "
 +                                        "1: short (default), 2: also x and f"));
  }
  
  bool CommandLineCommonOptionsHolder::finishOptions()
      return !bVersion_;
  }
  
 +void CommandLineCommonOptionsHolder::adjustFromSettings(
 +        const CommandLineModuleSettings &settings)
 +{
 +    if (!options_.isSet("nice"))
 +    {
 +        niceLevel_ = settings.defaultNiceLevel();
 +    }
 +}
 +
  /********************************************************************
   * CommandLineModuleManager::Impl
   */
@@@ -530,7 -512,7 +530,7 @@@ void CommandLineModuleManager::addHelpT
  int CommandLineModuleManager::run(int argc, char *argv[])
  {
      CommandLineModuleInterface    *module;
 -    const bool                     bMaster = (!gmx_mpi_initialized() || gmx_node_rank() == 0);
 +    const bool                     bMaster = (gmx_node_rank() == 0);
      bool                           bQuiet  = impl_->bQuiet_ || !bMaster;
      CommandLineCommonOptionsHolder optionsHolder;
      try
      {
          return 0;
      }
 -    int rc = module->run(argc, argv);
 +
 +    CommandLineModuleSettings settings;
 +    module->init(&settings);
 +    optionsHolder.adjustFromSettings(settings);
 +
 +    // Open the debug file.
 +    if (optionsHolder.debugLevel() > 0)
 +    {
 +        std::string filename(impl_->programContext_.programName());
 +        if (gmx_node_num() > 1)
 +        {
 +            filename.append(formatString("%d", gmx_node_rank()));
 +        }
 +        filename.append(".debug");
 +
 +        fprintf(stderr, "Will write debug log file: %s\n", filename.c_str());
 +        gmx_init_debug(optionsHolder.debugLevel(), filename.c_str());
 +    }
- #if defined(HAVE_UNISTD_H) && !defined(GMX_NO_NICE)
++#if defined(HAVE_UNISTD_H) && !defined(GMX_NO_NICE) && !defined(__MINGW32__)
 +    // Set the nice level unless disabled in the configuration.
 +    if (optionsHolder.niceLevel() != 0)
 +    {
 +        static bool bNiceSet = false; // Only set it once.
 +        if (!bNiceSet)
 +        {
 +            if (nice(optionsHolder.niceLevel()) == -1)
 +            {
 +                // Do nothing, but use the return value to avoid warnings.
 +            }
 +            bNiceSet = true;
 +        }
 +    }
 +#endif
 +
 +    int rc = 0;
 +    if (!(module == impl_->helpModule_ && !bMaster))
 +    {
 +        rc = module->run(argc, argv);
 +    }
      if (!bQuiet)
      {
          gmx_thanx(stderr);
index 20932d54c37627bcd7e91c4f145d2701f3058619,35c94c2eeb6e2766a4e34d74b9e3040d6cbfcd42..b677014564098d9ab2c65d5d4cf6a220ea1ec600
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 +#include "gmxpre.h"
 +
  #include "tngio.h"
  
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "config.h"
  
  #ifdef HAVE_UNISTD_H
  #include <unistd.h>
  #endif
  
  #include "gromacs/legacyheaders/copyrite.h"
 -#include "gromacs/legacyheaders/gmx_fatal.h"
 -#include "gromacs/legacyheaders/main.h"
 -#include "gromacs/legacyheaders/physics.h"
 +#include "gromacs/legacyheaders/types/ifunc.h"
 +
 +#include "gromacs/fileio/gmxfio.h"
 +#include "gromacs/math/units.h"
  #include "gromacs/math/utilities.h"
 +#include "gromacs/topology/topology.h"
 +#include "gromacs/utility/basenetwork.h"
  #include "gromacs/utility/common.h"
 +#include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/gmxassert.h"
  #include "gromacs/utility/programcontext.h"
 -#include "gmxfio.h"
  
  static const char *modeToVerb(char mode)
  {
@@@ -150,7 -147,7 +150,7 @@@ void gmx_tng_open(const char       *fil
  //             tng_last_program_name_set(*tng, programInfo);
  //         }
  
- #ifdef HAVE_UNISTD_H
+ #if defined(HAVE_UNISTD_H) && !defined(__MINGW32__)
          char username[256];
          if (!getlogin_r(username, 256))
          {
index 670aef0d7ba73440e59a0f4fa4c3f69b212dcbad,7d9c67c779d33613f2baf4c9d32d2be47739eb1c..b4ed95239e8e7eb757ca71064385ad310a290a16
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 +#include "gmxpre.h"
  
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "vmdio.h"
 +
 +#include "config.h"
  
  /* Derived from PluginMgr.C and catdcd.c */
  
@@@ -91,6 -90,7 +91,6 @@@
  #include <string.h>
  #include <assert.h>
  
 -#include <config.h>
  /*
   * Plugin header files; get plugin source from www.ks.uiuc.edu/Research/vmd"
   */
  #ifndef GMX_NATIVE_WINDOWS
  #include <glob.h>
  #else
+ #ifndef _WIN32_IE
+ #define _WIN32_IE 0x0500 /* SHGetFolderPath is available since WinXP/IE5 */
+ #endif
  #include <windows.h>
  #include <shlobj.h>
  #endif
 -#include "gromacs/utility/smalloc.h"
 -#include "futil.h"
 -#include "vmdio.h"
 -
  
 -#include "types/simple.h"
 -#include "vec.h"
 -#include "gmxfio.h"
 +#include "gromacs/fileio/gmxfio.h"
 +#include "gromacs/fileio/trx.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/utility/basedefinitions.h"
 +#include "gromacs/utility/futil.h"
 +#include "gromacs/utility/smalloc.h"
  
  
  typedef int (*initfunc)(void);
index 9162172255cd75e9164027d12f1717640173565d,18c0a312a61cd3940d1c7d84573ff7f501935137..b2c9c37e9704f25be89963b3a9893a7e9605d61a
  
  /* The source code in this file should be thread-safe.
     Please keep it that way. */
 +#include "gmxpre.h"
  
 +#include "gromacs/legacyheaders/checkpoint.h"
  
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "config.h"
  
 +#include <errno.h>
 +#include <stdlib.h>
  #include <string.h>
  #include <time.h>
  
 +#include <fcntl.h>
  #ifdef HAVE_SYS_TIME_H
  #include <sys/time.h>
  #endif
  #include <sys/locking.h>
  #endif
  
 -#include "copyrite.h"
 -#include "names.h"
 -#include "typedefs.h"
 -#include "types/commrec.h"
 -#include "gromacs/utility/smalloc.h"
 -#include "txtdump.h"
 -#include "vec.h"
 -#include "network.h"
 -#include "checkpoint.h"
 -#include "main.h"
 -#include "gromacs/utility/cstringutil.h"
 -#include <fcntl.h>
 +#include "gromacs/legacyheaders/copyrite.h"
 +#include "gromacs/legacyheaders/names.h"
 +#include "gromacs/legacyheaders/typedefs.h"
 +#include "gromacs/legacyheaders/types/commrec.h"
 +#include "gromacs/legacyheaders/txtdump.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/network.h"
  
  #include "gromacs/fileio/filenm.h"
 -#include "gromacs/fileio/futil.h"
 +#include "gromacs/utility/futil.h"
  #include "gromacs/fileio/gmxfio.h"
  #include "gromacs/fileio/xdrf.h"
  #include "gromacs/fileio/xdr_datatype.h"
 +#include "gromacs/utility/basenetwork.h"
  #include "gromacs/utility/baseversion.h"
 -#include "gmx_fatal.h"
 +#include "gromacs/utility/cstringutil.h"
 +#include "gromacs/utility/fatalerror.h"
 +#include "gromacs/utility/smalloc.h"
  
  #include "buildinfo.h"
  
@@@ -169,6 -168,7 +169,6 @@@ gmx_wintruncate(const char *filename, _
      return 0;
  #else
      FILE *fp;
 -    int   rc;
  
      fp = fopen(filename, "rb+");
  
          return -1;
      }
  
+ #ifdef _MSC_VER
      return _chsize_s( fileno(fp), size);
+ #else
+     return _chsize( fileno(fp), size);
+ #endif
  #endif
  }
  #endif
@@@ -463,8 -467,6 +467,8 @@@ static int do_cpte_reals_low(XDR *xd, i
      {
          if (dtc == xdr_datatype_double)
          {
 +            /* cppcheck-suppress invalidPointerCast
 +             * Only executed if real is anyhow double */
              vd = (double *)vp;
          }
          else
@@@ -533,6 -535,8 +537,6 @@@ static int do_cpte_n_reals(XDR *xd, in
  static int do_cpte_real(XDR *xd, int cptp, int ecpt, int sflags,
                          real *r, FILE *list)
  {
 -    int n;
 -
      return do_cpte_reals_low(xd, cptp, ecpt, sflags, 1, NULL, &r, list, ecprREAL);
  }
  
@@@ -542,7 -546,7 +546,7 @@@ static int do_cpte_ints(XDR *xd, int cp
      bool_t res = 0;
      int    dtc = xdr_datatype_int;
      int   *vp, *va = NULL;
 -    int    nf, dt, i;
 +    int    nf, dt;
  
      nf  = n;
      res = xdr_int(xd, &nf);
@@@ -609,7 -613,7 +613,7 @@@ static int do_cpte_doubles(XDR *xd, in
      bool_t  res = 0;
      int     dtc = xdr_datatype_double;
      double *vp, *va = NULL;
 -    int     nf, dt, i;
 +    int     nf, dt;
  
      nf  = n;
      res = xdr_int(xd, &nf);
@@@ -674,6 -678,8 +678,6 @@@ static int do_cpte_double(XDR *xd, int 
  static int do_cpte_rvecs(XDR *xd, int cptp, int ecpt, int sflags,
                           int n, rvec **v, FILE *list)
  {
 -    int n3;
 -
      return do_cpte_reals_low(xd, cptp, ecpt, sflags,
                               n*DIM, NULL, (real **)v, list, ecprRVEC);
  }
@@@ -682,7 -688,7 +686,7 @@@ static int do_cpte_matrix(XDR *xd, int 
                            matrix v, FILE *list)
  {
      real *vr;
 -    real  ret;
 +    int   ret;
  
      vr  = (real *)&(v[0][0]);
      ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
@@@ -701,7 -707,8 +705,7 @@@ static int do_cpte_nmatrix(XDR *xd, in
                             int n, real **v, FILE *list)
  {
      int   i;
 -    real *vr;
 -    real  ret, reti;
 +    int   ret, reti;
      char  name[CPTSTRLEN];
  
      ret = 0;
      }
      for (i = 0; i < n; i++)
      {
 -        reti = 0;
 -        vr   = v[i];
          reti = do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, &(v[i]), NULL, ecprREAL);
          if (list && reti == 0)
          {
              sprintf(name, "%s[%d]", st_names(cptp, ecpt), i);
              pr_reals(list, 0, name, v[i], n);
          }
 -        if (reti == 0)
 +        if (reti != 0)
          {
 -            ret = 0;
 +            ret = reti;
          }
      }
      return ret;
@@@ -813,6 -822,7 +817,6 @@@ static void do_cpt_header(XDR *xd, gmx_
      bool_t res = 0;
      int    magic;
      int    idum = 0;
 -    int    i;
      char  *fhost;
  
      if (bRead)
@@@ -1273,6 -1283,7 +1277,6 @@@ static int do_cpt_enerhist(XDR *xd, gmx
          {
              enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
          }
 -        fflags |= (1<<eenhENERGY_SUM_SIM);
      }
  
      if ( (fflags & (1<<eenhENERGY_NSUM)) &&
      {
          /* Assume we have an old file format and copy nsum to nsteps */
          enerhist->nsteps = enerhist->nsum;
 -        fflags          |= (1<<eenhENERGY_NSTEPS);
      }
      if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
           !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
      {
          /* Assume we have an old file format and copy nsum to nsteps */
          enerhist->nsteps_sim = enerhist->nsum_sim;
 -        fflags              |= (1<<eenhENERGY_NSTEPS_SIM);
      }
  
      return ret;
@@@ -1337,7 -1350,7 +1341,7 @@@ static int do_cpt_df_hist(XDR *xd, int 
  static int do_cpt_EDstate(XDR *xd, gmx_bool bRead,
                            edsamstate_t *EDstate, FILE *list)
  {
 -    int  i, j;
 +    int  i;
      int  ret = 0;
      char buf[STRLEN];
  
@@@ -1399,7 -1412,7 +1403,7 @@@ static int do_cpt_files(XDR *xd, gmx_bo
                          gmx_file_position_t **p_outputfiles, int *nfiles,
                          FILE *list, int file_version)
  {
 -    int                  i, j;
 +    int                  i;
      gmx_off_t            offset;
      gmx_off_t            mask = 0xFFFFFFFFL;
      int                  offset_high, offset_low;
@@@ -1503,12 -1516,12 +1507,12 @@@ void write_checkpoint(const char *fn, g
      char                *fntemp; /* the temporary checkpoint file name */
      time_t               now;
      char                 timebuf[STRLEN];
 -    int                  nppnodes, npmenodes, flag_64bit;
 +    int                  nppnodes, npmenodes;
      char                 buf[1024], suffix[5+STEPSTRSIZE], sbuf[STEPSTRSIZE];
      gmx_file_position_t *outputfiles;
      int                  noutputfiles;
      char                *ftime;
 -    int                  flags_eks, flags_enh, flags_dfh, i;
 +    int                  flags_eks, flags_enh, flags_dfh;
      t_fileio            *ret;
  
      if (DOMAINDECOMP(cr))
@@@ -1795,8 -1808,8 +1799,8 @@@ static void check_match(FILE *fplog
           */
          int   gmx_major, gmx_minor;
          int   cpt_major, cpt_minor;
 -        sscanf(gmx_version(), "VERSION %d.%d", &gmx_major, &gmx_minor);
 -        sscanf(version, "VERSION %d.%d", &cpt_major, &cpt_minor);
 +        sscanf(gmx_version(), "VERSION %5d.%5d", &gmx_major, &gmx_minor);
 +        sscanf(version, "VERSION %5d.%5d", &cpt_major, &cpt_minor);
          version_differs = (gmx_major != cpt_major || gmx_minor != cpt_minor);
      }
  
@@@ -1875,8 -1888,8 +1879,8 @@@ static void read_checkpoint(const char 
      int                  file_version;
      char                *version, *btime, *buser, *bhost, *fprog, *ftime;
      int                  double_prec;
 -    char                 filename[STRLEN], buf[STEPSTRSIZE];
 -    int                  nppnodes, eIntegrator_f, nppnodes_f, npmenodes_f;
 +    char                 buf[STEPSTRSIZE];
 +    int                  eIntegrator_f, nppnodes_f, npmenodes_f;
      ivec                 dd_nc_f;
      int                  natoms, ngtc, nnhpres, nhchainlength, nlambda, fflags, flags_eks, flags_enh, flags_dfh;
      int                  d;
  
      if (!PAR(cr))
      {
 -        nppnodes      = 1;
          cr->npmenodes = 0;
      }
      else if (cr->nnodes == nppnodes_f + npmenodes_f)
          {
              cr->npmenodes = npmenodes_f;
          }
 -        nppnodes = cr->nnodes - cr->npmenodes;
 +        int nppnodes = cr->nnodes - cr->npmenodes;
          if (nppnodes == nppnodes_f)
          {
              for (d = 0; d < DIM; d++)
              }
          }
      }
 -    else
 -    {
 -        /* The number of PP nodes has not been set yet */
 -        nppnodes = -1;
 -    }
  
      if (fflags != state->flags)
      {
@@@ -2451,6 -2470,8 +2455,6 @@@ void list_checkpoint(const char *fn, FI
      ivec                 dd_nc;
      t_state              state;
      int                  flags_eks, flags_enh, flags_dfh;
 -    int                  indent;
 -    int                  i, j;
      int                  ret;
      gmx_file_position_t *outputfiles;
      int                  nfiles;
index bdbda35b857433adc6fc995dc075206ba286ce12,020e37aa55f41544d69724a70708f5113ecd4941..c786c0e5bb1324a72de26c3f1ed3ca71f5545a17
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 +#include "gmxpre.h"
 +
  #ifdef HAVE_CONFIG_H
 -#include <config.h>
 +#include "config.h"
  #endif
  
  #ifdef HAVE_SCHED_H
  #include <stdlib.h>
  #include <string.h>
  #include <ctype.h>
- #ifdef _MSC_VER
+ #ifdef GMX_NATIVE_WINDOWS
  /* MSVC definition for __cpuid() */
- #include <intrin.h>
+     #ifdef _MSC_VER
+         #include <intrin.h>
+     #endif
  /* sysinfo functions */
- #include <windows.h>
    #include <windows.h>
  #endif
  #ifdef HAVE_UNISTD_H
  /* sysconf() definition */
- #include <unistd.h>
    #include <unistd.h>
  #endif
  
 -#include "gmx_cpuid.h"
 +#include "gromacs/legacyheaders/gmx_cpuid.h"
  
  
  
@@@ -469,7 -469,6 +471,7 @@@ cpuid_renumber_elements(int *data, int 
              }
          }
      }
 +    free(unique);
      return nunique;
  }
  
index 385856f71ddef3c5bdc01760f4c0f62b76b1fc0f,d38b2cfba1f30a5f51a665df5a106504038456a8..703ec4cf713c4196ed8053800fc497f707ddda66
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
- #if defined(HAVE_SCHED_H)
+ #ifdef HAVE_SCHED_AFFINITY
  #  ifndef _GNU_SOURCE
  #    define _GNU_SOURCE 1
  #  endif
  
  #include "thread_mpi/threads.h"
  
 -#include "typedefs.h"
 -#include "types/commrec.h"
 -#include "types/hw_info.h"
 -#include "copyrite.h"
 -#include "gmx_cpuid.h"
 -#include "gmx_omp_nthreads.h"
 -#include "md_logging.h"
 -#include "gmx_thread_affinity.h"
 -
 -#include "gmx_fatal.h"
 +#include "gromacs/legacyheaders/typedefs.h"
 +#include "gromacs/legacyheaders/types/commrec.h"
 +#include "gromacs/legacyheaders/types/hw_info.h"
 +#include "gromacs/legacyheaders/copyrite.h"
 +#include "gromacs/legacyheaders/gmx_cpuid.h"
 +#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
 +#include "gromacs/legacyheaders/md_logging.h"
 +#include "gromacs/legacyheaders/gmx_thread_affinity.h"
 +
 +#include "gromacs/utility/basenetwork.h"
 +#include "gromacs/utility/cstringutil.h"
 +#include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/gmxomp.h"
 +#include "gromacs/utility/smalloc.h"
  
  static int
  get_thread_affinity_layout(FILE *fplog,
@@@ -374,53 -371,18 +374,53 @@@ gmx_set_thread_affinity(FIL
   * Note that this will only work on Linux as we use a GNU feature.
   */
  void
 -gmx_check_thread_affinity_set(FILE            gmx_unused *fplog,
 -                              const t_commrec gmx_unused *cr,
 -                              gmx_hw_opt_t    gmx_unused *hw_opt,
 -                              int             gmx_unused  nthreads_hw_avail,
 -                              gmx_bool        gmx_unused  bAfterOpenmpInit)
 +gmx_check_thread_affinity_set(FILE            *fplog,
 +                              const t_commrec *cr,
 +                              gmx_hw_opt_t    *hw_opt,
 +                              int  gmx_unused  nthreads_hw_avail,
 +                              gmx_bool         bAfterOpenmpInit)
  {
  #ifdef HAVE_SCHED_AFFINITY
      cpu_set_t mask_current;
      int       i, ret, cpu_count, cpu_set;
      gmx_bool  bAllSet;
 +#endif
  
      assert(hw_opt);
 +    if (!bAfterOpenmpInit)
 +    {
 +        /* Check for externally set OpenMP affinity and turn off internal
 +         * pinning if any is found. We need to do this check early to tell
 +         * thread-MPI whether it should do pinning when spawning threads.
 +         * TODO: the above no longer holds, we should move these checks later
 +         */
 +        if (hw_opt->thread_affinity != threadaffOFF)
 +        {
 +            char *message;
 +            if (!gmx_omp_check_thread_affinity(&message))
 +            {
 +                /* TODO: with -pin auto we should only warn when using all cores */
 +                md_print_warn(cr, fplog, "%s", message);
 +                sfree(message);
 +                hw_opt->thread_affinity = threadaffOFF;
 +            }
 +        }
 +
 +        /* With thread-MPI this is needed as pinning might get turned off,
 +         * which needs to be known before starting thread-MPI.
 +         * With thread-MPI hw_opt is processed here on the master rank
 +         * and passed to the other ranks later, so we only do this on master.
 +         */
 +        if (!SIMMASTER(cr))
 +        {
 +            return;
 +        }
 +#ifndef GMX_THREAD_MPI
 +        return;
 +#endif
 +    }
 +
 +#ifdef HAVE_SCHED_GETAFFINITY
      if (hw_opt->thread_affinity == threadaffOFF)
      {
          /* internal affinity setting is off, don't bother checking process affinity */
index eb63d9a8650600fd25c9d9ae18a141f8f58d026d,85d96a1233ae37527c6a5f900d9e08a68a19f0c0..fc6ca9bc67dad6cccce09f9ac0bbd7da00f6cb7c
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
 -#include "vec.h"
 -#include "typedefs.h"
 -#include "nonbonded.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/typedefs.h"
 +#include "gromacs/legacyheaders/nonbonded.h"
  #include "nb_kernel.h"
 -#include "nrnb.h"
 -#include "macros.h"
 +#include "gromacs/legacyheaders/nrnb.h"
 +#include "gromacs/legacyheaders/macros.h"
  #include "nb_free_energy.h"
  
 -#include "gmx_fatal.h"
 +#include "gromacs/utility/fatalerror.h"
  
  void
  gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
@@@ -65,8 -65,9 +65,9 @@@
  #define  NSTATES  2
      int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
      real          shX, shY, shZ;
-     real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
-     real          Vcoul[NSTATES], Vvdw[NSTATES];
+     real          tx, ty, tz, Fscal;
+     double        FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
+     double        Vcoul[NSTATES], Vvdw[NSTATES];     /* Needs double for sc_power==48 */
      real          rinv6, r, rt, rtC, rtV;
      real          iqA, iqB;
      real          qq[NSTATES], vctot, krsq;
@@@ -79,7 -80,7 +80,7 @@@
      double        dvdl_coul, dvdl_vdw;
      real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
      real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
-     real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
+     double        rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV; /* Needs double for sc_power==48 */
      real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
      int           do_tab, tab_elemsize;
      int           n0, n1C, n1V, nnn;
      bDoPotential        = kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL;
  
      rcoulomb            = fr->rcoulomb;
 -    sh_ewald            = fr->ic->sh_ewald;
      rvdw                = fr->rvdw;
      sh_invrc6           = fr->ic->sh_invrc6;
      sh_lj_ewald         = fr->ic->sh_lj_ewald;
index d1b1ba034da53ac09af3069cde3e52c066f6af7d,6a567035f60209231581c935aec2ea54fffa6c49..d1b1ba034da53ac09af3069cde3e52c066f6af7d
mode 100755,100644..100644
@@@ -428,7 -428,8 +428,7 @@@ numKernels = 
  
  fpdecl = open('nb_kernel_' + Arch + '.c','w')
  fpdecl.write( FileHeader )
 -fpdecl.write( '#ifndef nb_kernel_' + Arch + '_h\n' )
 -fpdecl.write( '#define nb_kernel_' + Arch + '_h\n\n' )
 +fpdecl.write( '#include "gmxpre.h"\n\n' )
  fpdecl.write( '#include "../nb_kernel.h"\n\n' )
  
  for KernelElec in ElectrostaticsList:
@@@ -511,5 -512,6 +511,5 @@@ for decl in kerneldecl[0:-1]
  fpdecl.write( kerneldecl[-1] + '\n' )
  fpdecl.write( '};\n\n' )
  fpdecl.write( 'int\n' )
 -fpdecl.write( '    kernellist_'+Arch+'_size = sizeof(kernellist_'+Arch+')/sizeof(kernellist_'+Arch+'[0]);\n\n')
 -fpdecl.write( '#endif\n')
 +fpdecl.write( '    kernellist_'+Arch+'_size = sizeof(kernellist_'+Arch+')/sizeof(kernellist_'+Arch+'[0]);\n')
  fpdecl.close()
index 2e87bd52bdef2c21781d998eada64b5986c8f349,205f843c31bf0ae72a54d35594c3d0f429ac338f..3c8b96e7ba50134bef697e14f05f2e65e0334819
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -297,7 -297,8 +297,8 @@@ nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_V
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -629,7 -630,8 +630,8 @@@ nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index fbd62bab82ff2d2003417b8e6b9d69d7d1dc8577,c9fb58eb310f0b3160de047111a7754cef9658c1..310216adb6a5c8f272caf84cb46287c1606f64d4
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -437,7 -437,8 +437,8 @@@ nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_V
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -1001,7 -1002,8 +1002,8 @@@ nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 5135e7bad0b90f401277ddaa347e5b90cd65e687,1800d4f87dc86ab327621b5e4244e3988d509aa3..f1f9b8f2af375906756f25fc720986a2ca7ab47e
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -483,7 -483,8 +483,8 @@@ nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_V
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -1125,7 -1126,8 +1126,8 @@@ nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index e5c35edec6eca0ece4f07f1593246e3b72bdec80,c0d830ba08cd41fbdc3020a0cfdf52108872663f..fe3259d2276fbea632ec9b798535e9b0fd84c048
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -279,7 -279,8 +279,8 @@@ nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_s
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -573,7 -574,8 +574,8 @@@ nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sp
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 8c6b0ab5b80f62772570905c0b11705c1cb2b6d5,9b6cbc85ca7e1456d10b77739ac9771b1172012e..31d53447dce169070134feb7d8a2388ee04e643b
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -419,7 -419,8 +419,8 @@@ nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_s
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -945,7 -946,8 +946,8 @@@ nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sp
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 9ae48ae1059782d5fb15627a444ad84685e05b4a,fced48c2221e64894ce9c48b9da428a2a4910096..cb81142c880ddb2a1bded8e700cf3c2fba45cbaf
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -449,7 -449,8 +449,8 @@@ nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_s
               **************************/
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
@@@ -1027,7 -1028,8 +1028,8 @@@ nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sp
               **************************/
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
index 86f65975557357f4639504f28fea4234233b4283,1666f42b73e52f575dc7f3952c944e8e43aba792..111f202a7e679fced29cb87140cc29c879651fc6
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -290,7 -290,8 +290,8 @@@ nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -606,7 -607,8 +607,8 @@@ nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 473ac3463ea7012c1bf945e4205474e50560e58c,cc730d2aa31d54d0425c8a8088c55928f824bf36..e69261ef63760f33d00fd5970bff84c07786407c
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -392,7 -392,8 +392,8 @@@ nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -864,7 -865,8 +865,8 @@@ nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 32c465bdeec836f8d691ccf3616194518c32b8da,2a71b9706ba2782032ba0f26baf27690b83a09cb..48d2976c68d9a421275291b912668aadde594ab9
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -426,7 -426,8 +426,8 @@@ nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -952,7 -953,8 +953,8 @@@ nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index c323d5340503026d27dcaf9679d66fd51f57eb70,50b56ae841a7f3caf9ce931571e57663547aa8d4..42d8daba0324f870edf4f9b61ea5b64ace2c9d18
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -251,7 -251,8 +251,8 @@@ nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sp
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* COULOMB ELECTROSTATICS */
              velec            = _fjsp_mul_v2r8(qq00,rinv00);
@@@ -500,7 -501,8 +501,8 @@@ nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_spa
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* COULOMB ELECTROSTATICS */
              velec            = _fjsp_mul_v2r8(qq00,rinv00);
index af0cf054c2ea068503adc9df163b3de82fd02fca,5a8564f0ee93284cc79835c8559cecc162b1b03b..fa424c4e4a1314e980957c8f254d08f9e4a2e3f9
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -353,7 -353,8 +353,8 @@@ nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sp
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* COULOMB ELECTROSTATICS */
              velec            = _fjsp_mul_v2r8(qq00,rinv00);
@@@ -758,7 -759,8 +759,8 @@@ nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_spa
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* COULOMB ELECTROSTATICS */
              velec            = _fjsp_mul_v2r8(qq00,rinv00);
index 95667b3a43ae31370a4efbb7f854f93f0763ba06,e440c725b9c43bb1954a7da69005808a59babb4c..2beed1988a424283684abaac9e7e945353bc59bc
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -387,7 -387,8 +387,8 @@@ nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sp
               **************************/
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
@@@ -846,7 -847,8 +847,8 @@@ nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_spa
               **************************/
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
index 881a610e6884897dbbc2077e657af5d3cf80412c,eb524e19a788cd73c4d52adb767175da6f115af0..652086ce891c293605dab91d23b796d4c78ea1f4
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -246,11 -246,11 +246,11 @@@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_V
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
              vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                 _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
-             c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+             c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                    vdwgridparam+vdwioffset0+vdwjidx0B);
  
              /* EWALD ELECTROSTATICS */
  
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
              vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                 _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
@@@ -595,11 -597,11 +597,11 @@@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_F
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
-             c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+             c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                    vdwgridparam+vdwioffset0+vdwjidx0B);
  
              /* EWALD ELECTROSTATICS */
  
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index c2b63eb9fa0a5e6ecda9f4f1e46ba046c699aefd,c5582d564e5830396123e36911fa468c0f0175a3..046fb5655cf438140c82da1754e47ce1dfb34bdd
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -280,11 -280,11 +280,11 @@@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_V
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
              vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                 _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
-             c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+             c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                    vdwgridparam+vdwioffset0+vdwjidx0B);
  
              /* EWALD ELECTROSTATICS */
  
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
              vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                 _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
@@@ -901,11 -903,11 +903,11 @@@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_F
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
-             c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+             c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                    vdwgridparam+vdwioffset0+vdwjidx0B);
  
              /* EWALD ELECTROSTATICS */
  
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index c270f75362d8e41f56c62ba8aff546e320a9343c,99421c554db16773b4e993fbe2720fee376f9ad5..e3760142fd89d01b2c7ac6451964970cae937257
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -342,11 -342,11 +342,11 @@@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_V
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
              vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                 _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
              vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                 _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
@@@ -1651,11 -1651,11 +1651,11 @@@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_F
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index 90c02266aafda361917580a4311d73985e453cb1,b05d570ca4b849320a86edfb83812dccf67ad6c8..d8e80bef4334bbcaeea57172bfcae301e0fd7d77
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -273,11 -273,11 +273,11 @@@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_V
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
              vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                 _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
-             c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+             c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                    vdwgridparam+vdwioffset0+vdwjidx0B);
  
              /* Analytical LJ-PME */
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
              vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                 _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
@@@ -981,11 -983,11 +983,11 @@@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_F
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
-             c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+             c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                    vdwgridparam+vdwioffset0+vdwjidx0B);
  
              /* Analytical LJ-PME */
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index 7f3df8cfb4eab4da5119256506c0508b58cfbcc3,770162ee605d1327279e65efc7ff210bf5d6cf06..5dabbd9a3045d1c47be11df312e44b24013278a8
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -342,11 -342,11 +342,11 @@@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_V
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
              vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                 _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
              vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                 _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
@@@ -1744,11 -1744,11 +1744,11 @@@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_F
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index 444fd809fb9df4f62ca659569329e558920f2a98,e8bd4876ec592a070f4e824e4569bf78a2b7267a..35be03c0ec1ff9c450f90f4e7d607fa771696d4d
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -300,7 -300,8 +300,8 @@@ nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* EWALD ELECTROSTATICS */
  
@@@ -613,7 -614,8 +614,8 @@@ nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_s
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* EWALD ELECTROSTATICS */
  
index 97519cd9badac14e4ed01c7b2a01887ada6b7d41,09cf8d502c2f6adfed5ce5286036940949196543..5590857dc8bd18e3d1d67f8a84fb62661099f5a6
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -456,7 -456,8 +456,8 @@@ nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* EWALD ELECTROSTATICS */
  
@@@ -1019,7 -1020,8 +1020,8 @@@ nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_s
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* EWALD ELECTROSTATICS */
  
index 2e14a949d650a8e70dae73bf67b8630d0b997709,26a7b50fae9a3ad0a7c0999afc7ef9d219972e9e..662c00981490f417001c68aec8b1da66bdf1b5ab
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -497,7 -497,8 +497,8 @@@ nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_
              {
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
@@@ -1132,7 -1133,8 +1133,8 @@@ nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_s
              {
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
index e794bdbecf456baca2a06102344bcb381d746c03,ab8a4f9523dae756688ad8d4626c44efaebd23dd..02c75bbaf563824ff3922da8ed8fd195171216c9
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -323,7 -323,8 +323,8 @@@ nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* EWALD ELECTROSTATICS */
  
@@@ -679,7 -680,8 +680,8 @@@ nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_s
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* EWALD ELECTROSTATICS */
  
index 51771ffc566f57ed40977ad96125a456b4bdb61d,ddccfe10d83404d3706762c26e48f7833a4861c5..86534b4b33c3c9e306c3d9dec11a6be846073fba
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -501,7 -501,8 +501,8 @@@ nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* EWALD ELECTROSTATICS */
  
@@@ -1161,7 -1162,8 +1162,8 @@@ nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_s
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* EWALD ELECTROSTATICS */
  
index 1ce2359ed68cdc2725d4455361073cbf88d8db03,c55d849689d0d1dd7dc1f20746ec6cba1ae603c4..e0e9c060889f2de130c7e7a8c941a0a939029e3a
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -557,7 -557,8 +557,8 @@@ nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
@@@ -1313,7 -1314,8 +1314,8 @@@ nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_s
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
index 976003f3e77c8c7acda7488d0eab1a48aed4d9d2,1d38e53cc6f6ef99db740bbd513c05d6640cf9c4..dfdcbe424bb81b62079148db5715cecc237f0034
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -312,7 -312,8 +312,8 @@@ nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_s
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -659,7 -660,8 +660,8 @@@ nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sp
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 1b40bdd173f23e38fb6b4f3544454e5f30c4a90c,9f992bf22024028da650ef409ec4940bca4db6a1..a5df390d684d5c47d370d3bfdc6b2b7502a603b0
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -448,7 -448,8 +448,8 @@@ nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_s
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -1007,7 -1008,8 +1008,8 @@@ nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sp
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 37ce31c72ac19d3b9821871450e842e766d4d431,f8add6c515f46f210f9f5e35077fdf2ce3452717..0cb4333284755557664e4937fa0ee44b66a1879f
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -484,7 -484,8 +484,8 @@@ nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_s
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -1101,7 -1102,8 +1102,8 @@@ nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sp
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index ebd45fa7e79351ef67bbe0dbeec9005acb987add,81821a11666da0606eb309d09fb60cc91e67fc1e..a4e5a843659e0ec40c4c2c2003d74af6f14eb3e8
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -235,11 -235,11 +235,11 @@@ nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sp
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
              /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
-             c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+             c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                    vdwgridparam+vdwioffset0+vdwjidx0B);
  
              /* EWALD ELECTROSTATICS */
  
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
              /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@@ -552,11 -554,11 +554,11 @@@ nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_spa
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
-             c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+             c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                    vdwgridparam+vdwioffset0+vdwjidx0B);
  
              /* EWALD ELECTROSTATICS */
  
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index 8208eccb87f9f728978cbfcd493ac0d39509f832,38a986a4bf926b361dc7b9c9d172849cf15d78c8..02c004d7975a15bc1d657dd745005449a0f377e3
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -269,11 -269,11 +269,11 @@@ nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sp
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
              /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
-             c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+             c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                    vdwgridparam+vdwioffset0+vdwjidx0B);
  
              /* EWALD ELECTROSTATICS */
  
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
              /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@@ -818,11 -820,11 +820,11 @@@ nb_kernel_ElecEw_VdwLJEw_GeomW3P1_F_spa
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
-             c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+             c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                    vdwgridparam+vdwioffset0+vdwjidx0B);
  
              /* EWALD ELECTROSTATICS */
  
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index c51a737d841dfbfbec9abb8b1f70fef4e276e779,1d33ef6baa972515741967ab19510a01b8c51da1..34304fc32dfeda9dc9b1f1d117d3b844078e10a8
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -331,11 -331,11 +331,11 @@@ nb_kernel_ElecEw_VdwLJEw_GeomW3W3_VF_sp
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
              /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
              /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@@ -1448,11 -1448,11 +1448,11 @@@ nb_kernel_ElecEw_VdwLJEw_GeomW3W3_F_spa
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index 7eae21170b373d7a86fa65099418a3623f75193a,01312e63524ac4d4cb8a2ad005bef373cf4966ed..d15a704666dbc0ca481693d09cf40c50ed83c7e4
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -262,11 -262,11 +262,11 @@@ nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_sp
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
              /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
-             c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+             c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                    vdwgridparam+vdwioffset0+vdwjidx0B);
  
              /* Analytical LJ-PME */
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
              /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@@ -880,11 -882,11 +882,11 @@@ nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_spa
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
-             c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+             c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                    vdwgridparam+vdwioffset0+vdwjidx0B);
  
              /* Analytical LJ-PME */
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index c4e4135fa95e75c1afb8f762f30003a8add2c83e,52a678aaa6f67416e3c4b633cc61d6f9806e5c87..379f2e65043a0d58216fd1d63203c2cf19364064
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -331,11 -331,11 +331,11 @@@ nb_kernel_ElecEw_VdwLJEw_GeomW4W4_VF_sp
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
              /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
              /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@@ -1523,11 -1523,11 +1523,11 @@@ nb_kernel_ElecEw_VdwLJEw_GeomW4W4_F_spa
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index e03a0ef48dfda338abac7b9902587ff66d73c1fd,4802d316a99bfad6feba6412578301a2cd1e6ff3..be7bb6e5781d120f661c6dba998ba0f47770f304
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -277,7 -277,8 +277,8 @@@ nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_spar
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* EWALD ELECTROSTATICS */
  
@@@ -561,7 -562,8 +562,8 @@@ nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sparc
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* EWALD ELECTROSTATICS */
  
index 754f61ed6a6fcb2b6927b7e43689feba25a96945,f6819e234c74c17bcd3d98a8529f0856903fd23e..6673a44eb2aec143565a530d82ee90e6fdae1598
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -413,7 -413,8 +413,8 @@@ nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_spar
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* EWALD ELECTROSTATICS */
  
@@@ -909,7 -910,8 +910,8 @@@ nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sparc
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* EWALD ELECTROSTATICS */
  
index b596997fa0817e66f977e2bf38ccb4c1ff4eda00,380a4a454acf4000c46087fdd7dac3ce5d7fc5f2..5189a947bff21b54142f08a805403015efffa1f3
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -445,7 -445,8 +445,8 @@@ nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_spar
               **************************/
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
@@@ -995,7 -996,8 +996,8 @@@ nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sparc
               **************************/
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
index 0b371e50b7589b1e435e205809d8defcee417433,d5f3346b02d5b39d7aa49f4e98634a4be213c6ec..141f61f8243dd24fbab18a4ae2883e5cf745bc03
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -329,7 -329,8 +329,8 @@@ nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_s
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -716,7 -717,8 +717,8 @@@ nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sp
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index e4740b92f178608862f3397bf0cc48c2c1dfd0f2,2e3e255fbfd7771e457240dd4573a48235663d72..b9c8f053759009a6351bde2d037f8e83989cccfa
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -300,7 -300,8 +300,8 @@@ nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_spar
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
              isaprod          = _fjsp_mul_v2r8(isai0,isaj0);
@@@ -630,7 -631,8 +631,8 @@@ nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sparc
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
              isaprod          = _fjsp_mul_v2r8(isai0,isaj0);
index 002a7f36d600685929e146c925a80fa3475b2d68,6d1d4705f782cf610217f99d1d37b39385f3e00a..8547907a362f257b2d94dec45ceeda60ecd742e2
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -271,7 -271,8 +271,8 @@@ nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -563,7 -564,8 +564,8 @@@ nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 74146bbff009c34ffa89c426b652181aa9e1a827,f7785709002ef68ee331248f39e49fcc3a3f25f2..c096e05f1b5c9b8e054729de9b82a659e68ce993
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -211,11 -211,11 +211,11 @@@ nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_V
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
              vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                 _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
-             c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+             c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                    vdwgridparam+vdwioffset0+vdwjidx0B);
  
              /* Analytical LJ-PME */
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
              vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                 _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
@@@ -505,11 -507,11 +507,11 @@@ nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_F
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
-             c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+             c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                    vdwgridparam+vdwioffset0+vdwjidx0B);
  
              /* Analytical LJ-PME */
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index 1328f231c3e33cbb7460957297420e21f45d271a,1b354f3543642d05d0ca5b2568e444c1eba19165..6c52337bce5fc4f85462c6990d7098d0ea571091
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -201,11 -201,11 +201,11 @@@ nb_kernel_ElecNone_VdwLJEw_GeomP1P1_VF_
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
              /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
-             c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+             c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                    vdwgridparam+vdwioffset0+vdwjidx0B);
  
              /* Analytical LJ-PME */
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
              /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@@ -466,11 -468,11 +468,11 @@@ nb_kernel_ElecNone_VdwLJEw_GeomP1P1_F_s
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
-             c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+             c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                    vdwgridparam+vdwioffset0+vdwjidx0B);
  
              /* Analytical LJ-PME */
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index c23fe6ee27764bbc79c8982569dcec5ed3b409f9,e7e5b27696fde1de1b01353f9cade1a44149e837..638ef0abb58e0dbaf43502a65b8901fcf6f91116
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -253,7 -253,8 +253,8 @@@ nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_
              {
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
@@@ -505,7 -506,8 +506,8 @@@ nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_s
              {
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
index 64bf5ad01cce1e72de2b70cd299491710f1089ff,3efb862dacce95249274c51201a5677f998cca62..515cac9eba6b504c96e9e38db526e080c260d8b5
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -282,7 -282,8 +282,8 @@@ nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
@@@ -576,7 -577,8 +577,8 @@@ nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_s
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
index 61c4e883513b8696807350ea04c3d2b81dde0a02,2b3f34e4ae0fd80cc0e8d1cedf30d236a9fcf986..b62a36499ee0a721dd4fe092d81a123019dafc4e
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -232,7 -232,8 +232,8 @@@ nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sp
               **************************/
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
@@@ -457,7 -458,8 +458,8 @@@ nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_spa
               **************************/
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
index 979df716bc0fb9cfd80724c78389182d889bd4ab,c42edbd9665f75146dcc16e8217f90c9e627f177..e4367156769bc86d64093441ed2ccfc876179b6e
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -312,7 -312,8 +312,8 @@@ nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_V
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -655,7 -656,8 +656,8 @@@ nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 3e63f540457656f6bdbad12f6556ff043f852462,018952c15612f45b278aeb951ee06e0510566a6e..c7fa1728ea48d79ae3db01fe3ef14144f163c389
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -434,7 -434,8 +434,8 @@@ nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_V
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -969,7 -970,8 +970,8 @@@ nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index c99ca20f97fb5c3418b7a8392957d7423b34f2f9,82381b12a1932088f9ad21f6e4535a70a9f083a0..495a32a634549e946dfdb35e583d8c7a8c833cd7
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -464,7 -464,8 +464,8 @@@ nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_V
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -1052,7 -1053,8 +1053,8 @@@ nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 1d1a34200d2deaac43bad50f89af180c2fba20fb,00e2e4d8bc8dba65720e763bbfd8fffb52bbf481..95c1bf4946b8f1ac0e2c0549568c93c989e6d72f
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -277,7 -277,8 +277,8 @@@ nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* REACTION-FIELD ELECTROSTATICS */
              velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
@@@ -557,7 -558,8 +558,8 @@@ nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* REACTION-FIELD ELECTROSTATICS */
              felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
index 7743a69904257ae429b9db1dbbd7752f24ecc2f8,7bd46a31596b07ce5a712062cb78270d8174736b..83148ea8666e9610d92b23dde1b56cc6743a09be
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -399,7 -399,8 +399,8 @@@ nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* REACTION-FIELD ELECTROSTATICS */
              velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
@@@ -871,7 -872,8 +872,8 @@@ nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* REACTION-FIELD ELECTROSTATICS */
              felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
index 0f9b3421192e80d527a19a199af465b41a302f03,b56a73d8e5bebf5e692880e7d3619d1318c89dc5..1b908815ef9a31953319ae33629a6214516908b7
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -442,7 -442,8 +442,8 @@@ nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF
              {
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
@@@ -986,7 -987,8 +987,8 @@@ nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_
              {
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
index 334a041e6d2355c4e73af6a52d43b4b1d22386d7,eb84028588f27f8a1d6cd51d61b8e8e0511473b0..55b463e8e77b3416154646be5a4d59325287072d
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -302,7 -302,8 +302,8 @@@ nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* REACTION-FIELD ELECTROSTATICS */
              velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
@@@ -620,7 -621,8 +621,8 @@@ nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* REACTION-FIELD ELECTROSTATICS */
              felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
index cc18c0aa47ccf734638a53a70b3a4420190838b4,2219811420c0d2681105a5af483f950418ae1838..680a0df90edef7d448e5234a6a155cfa10dae838
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -424,7 -424,8 +424,8 @@@ nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* REACTION-FIELD ELECTROSTATICS */
              velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
@@@ -934,7 -935,8 +935,8 @@@ nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* REACTION-FIELD ELECTROSTATICS */
              felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
index cf9c1edbb31d3e776e5ad0e58996c72ab33a5b9d,8f0dbea164f04e8f90b58fa08cbd27b69621cdc4..ded5f63a03f6b350d108561d473507c060065754
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -469,7 -469,8 +469,8 @@@ nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
@@@ -1053,7 -1054,8 +1054,8 @@@ nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
index 4bedaed7d9fae6e8656583a66cfaf846411aa920,15436673e224561f701bfc097ccadada862fccc9..0b70695a78d7408dcddec8ece6a3c03461f64628
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -293,7 -293,8 +293,8 @@@ nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_s
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -611,7 -612,8 +612,8 @@@ nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sp
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index fdcc647dfafd04d3366e775783451e6485d81bde,b0f5066e984ffbfab2521ebe76019478e81e605b..4e13180e65747768d9c6f476b2b740d0548a879c
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -395,7 -395,8 +395,8 @@@ nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_s
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -867,7 -868,8 +868,8 @@@ nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sp
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index c7fa4c6f074ef31e14f5bbc464f1566b02b21d5c,7b2c2b7abda89e422392d818fb453f205f8afc0a..ad8da6d53710d93f5804f7f5a01596e5e4f76c46
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -429,7 -429,8 +429,8 @@@ nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_s
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@@ -955,7 -956,8 +956,8 @@@ nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sp
              r00              = _fjsp_mul_v2r8(rsq00,rinv00);
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* Calculate table index by multiplying r with table scale and truncate to integer */
              rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 7d497dab08ee7fb1c85d2d55b310e97146845308,4588517f7d0dc7aaeed3805a2a5e933d0a711f25..24b01bae5f5ddae297619eb6b283df389c1e0383
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -254,7 -254,8 +254,8 @@@ nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_spar
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* REACTION-FIELD ELECTROSTATICS */
              velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
@@@ -505,7 -506,8 +506,8 @@@ nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sparc
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* REACTION-FIELD ELECTROSTATICS */
              felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
index c3524155aa1fd8b0089d1da08d0e0bfe48091a67,295f5be73f17ed0dfa9baee618ceffe2efc0233f..00fcc3d7133c687106cffc8bb1fe54bb1f037610
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -356,7 -356,8 +356,8 @@@ nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_spar
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* REACTION-FIELD ELECTROSTATICS */
              velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
@@@ -761,7 -762,8 +762,8 @@@ nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sparc
  
              /* Compute parameters for interactions between i and j atoms */
              qq00             = _fjsp_mul_v2r8(iq0,jq0);
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* REACTION-FIELD ELECTROSTATICS */
              felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
index e9e38c2b4c881ab2e5ffe1e2490f31458f6ad5bc,93df321d6cc843d5cfe69cbeaf41fd5721534915..7df06226d85858006cee842adb397edc3cadf4dc
  /*
   * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  
@@@ -390,7 -390,8 +390,8 @@@ nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_spar
               **************************/
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
@@@ -849,7 -850,8 +850,8 @@@ nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sparc
               **************************/
  
              /* Compute parameters for interactions between i and j atoms */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                          vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
  
              /* LENNARD-JONES DISPERSION/REPULSION */
  
index 07fed742c850b5e5ee3064f59953cf7077bc4e75,422e6a20a346ff3c9ec95497698487d9b2b08b7f..2fe3d394922d36a8d08ad29bdd041a94683f9c46
  #error This file must be processed with the Gromacs pre-preprocessor
  /* #endif */
  /* #if INCLUDE_HEADER */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  
  #include "../nb_kernel.h"
 -#include "types/simple.h"
 -#include "gromacs/legacyheaders/vec.h"
 -#include "nrnb.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/legacyheaders/nrnb.h"
  
  #include "kernelutil_sparc64_hpc_ace_double.h"
  /* #endif */
@@@ -543,20 -543,12 +543,12 @@@ voi
              /*             #define INNERFLOPS INNERFLOPS+1 */
              /*         #endif */
              /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
-             /*             #if ROUND == 'Loop' */
              gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset{I}+vdwjidx{J}A,
                                           vdwparam+vdwioffset{I}+vdwjidx{J}B,&c6_{I}{J},&c12_{I}{J});
  
-           /*                 #if 'LJEwald' in KERNEL_VDW */
+           /*             #if 'LJEwald' in KERNEL_VDW */
              c6grid_{I}{J}       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset{I}+vdwjidx{J}A,
                                                                     vdwgridparam+vdwioffset{I}+vdwjidx{J}B);
-             /*                 #endif */
-             /*             #else */
-             gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset{I}+vdwjidx{J}A,&c6_{I}{J},&c12_{I}{J});
-             /*                 #if 'LJEwald' in KERNEL_VDW */
-             c6grid_{I}{J}       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset{I}+vdwjidx{J}A);
-             /*                 #endif */
              /*             #endif */
              /*         #endif */
              /*     #endif */
              rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq{I}{J},rinvsq{I}{J}),rinvsq{I}{J});
              ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq{I}{J});
              ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-             exponent         = gmx_simd_exp_d(-ewcljrsq);
+             exponent         = gmx_simd_exp_d(ewcljrsq);
              /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
              /*                 #define INNERFLOPS INNERFLOPS+9 */
              /*             #if 'Potential' in KERNEL_VF or KERNEL_MOD_VDW=='PotentialSwitch' */
              /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_{I}{J},_fjsp_sub_v2r8(one,poly),c6_{I}{J}),rinvsix);
+             vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_{I}{J},_fjsp_sub_v2r8(poly,one),c6_{I}{J}),rinvsix);
              vvdw12           = _fjsp_mul_v2r8(c12_{I}{J},_fjsp_mul_v2r8(rinvsix,rinvsix));
              /*                 #define INNERFLOPS INNERFLOPS+5 */
              /*                 #if KERNEL_MOD_VDW=='PotentialShift' */
              /*                  #endif */
              /*              #elif KERNEL_VF=='Force' */
              /* f6A = 6 * C6grid * (1 - poly) */
-             f6A              = _fjsp_mul_v2r8(c6grid_{I}{J},_fjsp_msub_v2r8(one,poly));
+             f6A              = _fjsp_mul_v2r8(c6grid_{I}{J},_fjsp_sub_v2r8(one,poly));
              /* f6B = C6grid * exponent * beta^6 */
              f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_{I}{J},one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
              /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index 1dfa19a18eeda552bcc9d15673655f68352c83c8,3d2b2b07cd4e2516d3b6c9e742d3a52531c1fd5d..7fcd21cd9f92c25ea1cd5faafe938c1793414ccd
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <ctype.h>
 -#include <stdlib.h>
  #include <limits.h>
 -#include "sysstuff.h"
 -#include "gromacs/utility/smalloc.h"
 -#include "typedefs.h"
 -#include "physics.h"
 -#include "names.h"
 -#include "gmx_fatal.h"
 -#include "macros.h"
 -#include "index.h"
 -#include "symtab.h"
 +#include <stdlib.h>
 +
 +#include "gromacs/legacyheaders/typedefs.h"
 +#include "gromacs/math/units.h"
 +#include "gromacs/legacyheaders/names.h"
 +#include "gromacs/legacyheaders/macros.h"
 +#include "gromacs/topology/index.h"
  #include "gromacs/utility/cstringutil.h"
 -#include "readinp.h"
 -#include "warninp.h"
 +#include "gromacs/legacyheaders/readinp.h"
 +#include "gromacs/legacyheaders/warninp.h"
  #include "readir.h"
  #include "toputil.h"
 -#include "index.h"
 -#include "network.h"
 -#include "vec.h"
 -#include "pbc.h"
 -#include "mtop_util.h"
 -#include "chargegroup.h"
 -#include "inputrec.h"
 +#include "gromacs/legacyheaders/network.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/pbcutil/pbc.h"
 +#include "gromacs/topology/mtop_util.h"
 +#include "gromacs/legacyheaders/chargegroup.h"
 +#include "gromacs/legacyheaders/inputrec.h"
  #include "calc_verletbuf.h"
  
 +#include "gromacs/topology/block.h"
 +#include "gromacs/topology/symtab.h"
 +#include "gromacs/utility/fatalerror.h"
 +#include "gromacs/utility/smalloc.h"
 +
  #define MAXPTR 254
  #define NOGID  255
  
@@@ -1006,7 -1005,7 +1006,7 @@@ void check_ir(const char *mdparin, t_in
          sprintf(err_buf, "tau-p must be > 0 instead of %g\n", ir->tau_p);
          CHECK(ir->tau_p <= 0);
  
-         if (ir->tau_p/dt_pcoupl < pcouple_min_integration_steps(ir->epc))
+         if (ir->tau_p/dt_pcoupl < pcouple_min_integration_steps(ir->epc) - 10*GMX_REAL_EPS)
          {
              sprintf(warn_buf, "For proper integration of the %s barostat, tau-p (%g) should be at least %d times larger than nstpcouple*dt (%g)",
                      EPCOUPLTYPE(ir->epc), ir->tau_p, pcouple_min_integration_steps(ir->epc), dt_pcoupl);
@@@ -1424,7 -1423,7 +1424,7 @@@ int str_nelem(const char *str, int maxp
      int   np = 0;
      char *copy0, *copy;
  
 -    copy0 = strdup(str);
 +    copy0 = gmx_strdup(str);
      copy  = copy0;
      ltrim(copy);
      while (*copy != '\0')
@@@ -1679,7 -1678,7 +1679,7 @@@ static void do_wall_params(t_inputrec *
          }
          for (i = 0; i < ir->nwall; i++)
          {
 -            opts->wall_atomtype[i] = strdup(names[i]);
 +            opts->wall_atomtype[i] = gmx_strdup(names[i]);
          }
  
          if (ir->wall_type == ewt93 || ir->wall_type == ewt104)
@@@ -2342,7 -2341,7 +2342,7 @@@ void get_ir(const char *mdparin, const 
      {
          if (ir->efep != efepNO)
          {
 -            opts->couple_moltype = strdup(is->couple_moltype);
 +            opts->couple_moltype = gmx_strdup(is->couple_moltype);
              if (opts->couple_lam0 == opts->couple_lam1)
              {
                  warning(wi, "The lambda=0 and lambda=1 states for coupling are identical");
@@@ -2934,7 -2933,7 +2934,7 @@@ static void decode_cos(char *s, t_cosin
      double  a, phi;
      int     i;
  
 -    t = strdup(s);
 +    t = gmx_strdup(s);
      trim(t);
  
      cosine->n   = 0;
@@@ -3115,7 -3114,7 +3115,7 @@@ static void make_swap_groups
  
  void make_IMD_group(t_IMD *IMDgroup, char *IMDgname, t_blocka *grps, char **gnames)
  {
 -    int      ig = -1, i;
 +    int      ig, i;
  
  
      ig            = search_string(IMDgname, grps->nr, gnames);
@@@ -3281,7 -3280,7 +3281,7 @@@ void do_index(const char* mdparin, cons
          nstcmin = tcouple_min_integration_steps(ir->etc);
          if (nstcmin > 1)
          {
-             if (tau_min/(ir->delta_t*ir->nsttcouple) < nstcmin)
+             if (tau_min/(ir->delta_t*ir->nsttcouple) < nstcmin - 10*GMX_REAL_EPS)
              {
                  sprintf(warn_buf, "For proper integration of the %s thermostat, tau-t (%g) should be at least %d times larger than nsttcouple*dt (%g)",
                          ETCOUPLTYPE(ir->etc),
index 7b3fff08a2b346bacd978bd7dd02a5b2cd614948,3cf53cd7d9acba8fe4b519002e3ff7f4f6f53055..9c4da0eb1b8c945e31eb46a52975b60135b32aa9
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
+ #include <assert.h>
  #include <math.h>
  #include <string.h>
  
 -#include "gromacs/utility/smalloc.h"
 -#include "sysstuff.h"
 -#include "macros.h"
 +#include "gromacs/legacyheaders/macros.h"
  #include "topdirs.h"
  #include "toputil.h"
 -#include "symtab.h"
 -#include "gmx_fatal.h"
  #include "gpp_atomtype.h"
  
 +#include "gromacs/topology/block.h"
 +#include "gromacs/topology/symtab.h"
 +#include "gromacs/utility/fatalerror.h"
 +#include "gromacs/utility/smalloc.h"
 +
  /* UTILITIES */
  
  void set_p_string(t_param *p, const char *s)
@@@ -87,12 -87,7 +88,7 @@@ void pr_alloc (int extra, t_params *pr
      {
          return;
      }
-     if ((pr->nr == 0) && (pr->param != NULL))
-     {
-         fprintf(stderr, "Warning: dangling pointer at %lx\n",
-                 (unsigned long)pr->param);
-         pr->param = NULL;
-     }
+     assert(!((pr->nr == 0) && (pr->param != NULL)));
      if (pr->nr+extra > pr->maxnr)
      {
          pr->maxnr = max(1.2*pr->maxnr, pr->maxnr + extra);
diff --combined src/gromacs/simd/simd.h
index f7c73eb7a4a14e74b2edc62bffa43d5005b3e78a,8ef1c8cac4e563c0cef3c46fc571c1d3b20b7691..49ca593f50c3570667a166297099236b5d3622fa
   * \ingroup module_simd
   */
  
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "config.h"
  
  #include <stddef.h>
 -#include "gromacs/legacyheaders/types/simple.h"
 +
 +#include "gromacs/utility/basedefinitions.h"
  
  /* Forward declarations so memory allocation can be used in implementations */
  static gmx_inline float *  gmx_simd_align_f(float *p);
@@@ -125,6 -126,8 +125,8 @@@ static gmx_inline double * gmx_simd4_al
  #    include "gromacs/simd/impl_x86_sse2/impl_x86_sse2.h"
  #elif defined GMX_SIMD_IBM_QPX
  #    include "gromacs/simd/impl_ibm_qpx/impl_ibm_qpx.h"
+ #elif defined GMX_SIMD_SPARC64_HPC_ACE
+ #    include "gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace.h"
  #elif (defined GMX_SIMD_REFERENCE) || (defined DOXYGEN)
  /* Plain C SIMD reference implementation, also serves as documentation.
   * For now this code path will also be taken for Sparc64_HPC_ACE since we have
index 81121cc3d6385fd5a903e616515fbc3ed1168fac,e3fa9d910684c4075af86fb081d41d96599cb20f..c8429f729f8eeee81fa090dc602ffdec2bb20e68
@@@ -32,9 -32,9 +32,9 @@@
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include "simd.h"
  
@@@ -80,21 -80,6 +80,6 @@@ const gmx_simd_real_t rSimd_Exp      = 
  const gmx_simd_real_t rSimd_ExpDouble = setSimdRealFrom3R( 6.287393598732017379054414e+176,
                                                             8.794495252903116023030553e-140,
                                                             -3.637060701570496477655022e+202);
- // Magic FP numbers corresponding to specific bit patterns
- const gmx_simd_real_t rSimd_Bits1    = setSimdRealFrom1R(-1.07730874267432137e+236);
- const gmx_simd_real_t rSimd_Bits2    = setSimdRealFrom1R(-9.25596313493178307e+061);
- const gmx_simd_real_t rSimd_Bits3    = setSimdRealFrom1R(-8.57750588235293981e+003);
- const gmx_simd_real_t rSimd_Bits4    = setSimdRealFrom1R( 1.22416778341839096e-250);
- const gmx_simd_real_t rSimd_Bits5    = setSimdRealFrom1R(-1.15711777004554095e+294);
- const gmx_simd_real_t rSimd_Bits6    = setSimdRealFrom1R( 1.53063836115600621e-018);
- #    else
- // Magic FP numbers corresponding to specific bit patterns
- const gmx_simd_real_t rSimd_Bits1    = setSimdRealFrom1R(-5.9654142337e+29);
- const gmx_simd_real_t rSimd_Bits2    = setSimdRealFrom1R(-1.0737417600e+08);
- const gmx_simd_real_t rSimd_Bits3    = setSimdRealFrom1R(-6.0235290527e+00);
- const gmx_simd_real_t rSimd_Bits4    = setSimdRealFrom1R( 1.0788832913e-31);
- const gmx_simd_real_t rSimd_Bits5    = setSimdRealFrom1R(-1.0508719529e+37);
- const gmx_simd_real_t rSimd_Bits6    = setSimdRealFrom1R( 1.1488970369e-02);
  #    endif
  #endif  // GMX_SIMD_HAVE_REAL
  #ifdef GMX_SIMD_HAVE_INT32
index a2d51c16f273daced0bfb44a29c04420f72682a0,f8d50ae877da6699cecab53aaa9f5ab2f02a91ad..79ff7783d87e0a6557ddccc062d0c82cf4355aec
@@@ -32,9 -32,9 +32,9 @@@
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include "simd4.h"
  
@@@ -68,20 -68,6 +68,6 @@@ const gmx_simd4_real_t rSimd4_Exp      
  const gmx_simd4_real_t  rSimd_ExpDouble = setSimd4RealFrom3R( 6.287393598732017379054414e+176,
                                                                8.794495252903116023030553e-140,
                                                                -3.637060701570496477655022e+202);
- // Magic FP numbers corresponding to specific bit patterns
- const gmx_simd4_real_t rSimd4_Bits1    = setSimd4RealFrom1R(-1.07730874267432137e+236);
- const gmx_simd4_real_t rSimd4_Bits2    = setSimd4RealFrom1R(-9.25596313493178307e+061);
- const gmx_simd4_real_t rSimd4_Bits3    = setSimd4RealFrom1R(-8.57750588235293981e+003);
- const gmx_simd4_real_t rSimd4_Bits4    = setSimd4RealFrom1R( 1.22416778341839096e-250);
- const gmx_simd4_real_t rSimd4_Bits5    = setSimd4RealFrom1R(-1.15711777004554095e+294);
- const gmx_simd4_real_t rSimd4_Bits6    = setSimd4RealFrom1R( 1.53063836115600621e-018);
- #    else
- const gmx_simd4_real_t rSimd4_Bits1    = setSimd4RealFrom1R(-5.9654142337e+29);
- const gmx_simd4_real_t rSimd4_Bits2    = setSimd4RealFrom1R(-1.0737417600e+08);
- const gmx_simd4_real_t rSimd4_Bits3    = setSimd4RealFrom1R(-6.0235290527e+00);
- const gmx_simd4_real_t rSimd4_Bits4    = setSimd4RealFrom1R( 1.0788832913e-31);
- const gmx_simd4_real_t rSimd4_Bits5    = setSimd4RealFrom1R(-1.0508719529e+37);
- const gmx_simd4_real_t rSimd4_Bits6    = setSimd4RealFrom1R( 1.1488970369e-02);
  #    endif
  
  ::std::vector<real>
index cde24e81af458c52e656e18a2b5f68a8f2b5c328,57bfe856516bb0d03597aa4b5f8c23027f0252af..5d7a92251e589bd64cdb7da83d36c14fb51f45e1
@@@ -32,9 -32,9 +32,9 @@@
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  #include "gromacs/math/utilities.h"
@@@ -121,25 -121,47 +121,47 @@@ TEST_F(Simd4FloatingpointTest, gmxSimd4
  }
  
  #ifdef GMX_SIMD4_HAVE_LOGICAL
+ /* 1.3333282470703125 has mantissa 0101010101010101 (followed by zeros)
+  * 1.79998779296875   has mantissa 1100110011001100 (followed by zeros)
+  * 1.26666259765625   has mantissa 0100010001000100 (followed by zeros)
+  * 1.8666534423828125 has mantissa 1101110111011101 (followed by zeros)
+  *
+  * Since all of them have the same exponent (2^0), the exponent will
+  * not change with AND or OR operations.
+  */
  TEST_F(Simd4FloatingpointTest, gmxSimd4AndR)
  {
-     GMX_EXPECT_SIMD4_REAL_EQ(rSimd4_Bits3, gmx_simd4_and_r(rSimd4_Bits1, rSimd4_Bits2)); // Bits1 & Bits2 = Bits3
+     GMX_EXPECT_SIMD4_REAL_EQ(setSimd4RealFrom1R(1.26666259765625),
+                              gmx_simd4_and_r(gmx_simd4_set1_r(1.3333282470703125),
+                                              gmx_simd4_set1_r(1.79998779296875)));
  }
  
- TEST_F(Simd4FloatingpointTest, gmxSimd4AndnotR)
+ TEST_F(Simd4FloatingpointTest, gmxSimd4OrR)
  {
-     GMX_EXPECT_SIMD4_REAL_EQ(rSimd4_Bits4, gmx_simd4_andnot_r(rSimd4_Bits1, rSimd4_Bits2)); // (~Bits1) & Bits2 = Bits3
+     GMX_EXPECT_SIMD4_REAL_EQ(setSimd4RealFrom1R(1.8666534423828125),
+                              gmx_simd4_or_r(gmx_simd4_set1_r(1.3333282470703125),
+                                             gmx_simd4_set1_r(1.79998779296875)));
  }
  
- TEST_F(Simd4FloatingpointTest, gmxSimd4OrR)
+ TEST_F(Simd4FloatingpointTest, gmxSimd4XorR)
  {
-     GMX_EXPECT_SIMD4_REAL_EQ(rSimd4_Bits5, gmx_simd4_or_r(rSimd4_Bits1, rSimd4_Bits2)); // Bits1 | Bits2 = Bits3
+     /* Test xor by taking xor with a number and its negative. This should result
+      * in only the sign bit being set. We then use this bit change the sign of
+      * different numbers.
+      */
+     gmx_simd4_real_t signbit = gmx_simd4_xor_r(gmx_simd4_set1_r(1.5), gmx_simd4_set1_r(-1.5));
+     GMX_EXPECT_SIMD4_REAL_EQ(setSimd4RealFrom3R(-1, 2, -3), gmx_simd4_xor_r(signbit, setSimd4RealFrom3R(1, -2, 3)));
  }
  
- TEST_F(Simd4FloatingpointTest, gmxSimd4XorR)
+ TEST_F(Simd4FloatingpointTest, gmxSimd4AndnotR)
  {
-     GMX_EXPECT_SIMD4_REAL_EQ(rSimd4_Bits6, gmx_simd4_xor_r(rSimd4_Bits1, rSimd4_Bits2)); // Bits1 ^ Bits2 = Bits3
+     /* Use xor (which we already tested, so fix that first if both tests fail)
+      * to extract the sign bit, and then use andnot to take absolute values.
+      */
+     gmx_simd4_real_t signbit = gmx_simd4_xor_r(gmx_simd4_set1_r(1.5), gmx_simd4_set1_r(-1.5));
+     GMX_EXPECT_SIMD4_REAL_EQ(setSimd4RealFrom3R(1, 2, 3), gmx_simd4_andnot_r(signbit, setSimd4RealFrom3R(-1, 2, -3)));
  }
  #endif
  
  TEST_F(Simd4FloatingpointTest, gmxSimd4MaxR)
index 5be60b44b66894f873a410298206f2a56400cb14,44bd447305e8dd947d374ce285fc272bbd1f9012..78584b72c9b1f7cac495079122adb74cc7b0225e
@@@ -32,9 -32,9 +32,9 @@@
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "config.h"
  
  #include <math.h>
  #include "gromacs/math/utilities.h"
@@@ -128,25 -128,47 +128,47 @@@ TEST_F(SimdFloatingpointTest, gmxSimdFn
  }
  
  #ifdef GMX_SIMD_HAVE_LOGICAL
+ /* 1.3333282470703125 has mantissa 0101010101010101 (followed by zeros)
+  * 1.79998779296875   has mantissa 1100110011001100 (followed by zeros)
+  * 1.26666259765625   has mantissa 0100010001000100 (followed by zeros)
+  * 1.8666534423828125 has mantissa 1101110111011101 (followed by zeros)
+  *
+  * Since all of them have the same exponent (2^0), the exponent will
+  * not change with AND or OR operations.
+  */
  TEST_F(SimdFloatingpointTest, gmxSimdAndR)
  {
-     GMX_EXPECT_SIMD_REAL_EQ(rSimd_Bits3, gmx_simd_and_r(rSimd_Bits1, rSimd_Bits2)); // Bits1 & Bits2 = Bits3
+     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom1R(1.26666259765625),
+                             gmx_simd_and_r(gmx_simd_set1_r(1.3333282470703125),
+                                            gmx_simd_set1_r(1.79998779296875)));
  }
  
- TEST_F(SimdFloatingpointTest, gmxSimdAndnotR)
+ TEST_F(SimdFloatingpointTest, gmxSimdOrR)
  {
-     GMX_EXPECT_SIMD_REAL_EQ(rSimd_Bits4, gmx_simd_andnot_r(rSimd_Bits1, rSimd_Bits2)); // (~Bits1) & Bits2 = Bits3
+     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom1R(1.8666534423828125),
+                             gmx_simd_or_r(gmx_simd_set1_r(1.3333282470703125),
+                                           gmx_simd_set1_r(1.79998779296875)));
  }
  
- TEST_F(SimdFloatingpointTest, gmxSimdOrR)
+ TEST_F(SimdFloatingpointTest, gmxSimdXorR)
  {
-     GMX_EXPECT_SIMD_REAL_EQ(rSimd_Bits5, gmx_simd_or_r(rSimd_Bits1, rSimd_Bits2)); // Bits1 | Bits2 = Bits3
+     /* Test xor by taking xor with a number and its negative. This should result
+      * in only the sign bit being set. We then use this bit change the sign of
+      * different numbers.
+      */
+     gmx_simd_real_t signbit = gmx_simd_xor_r(gmx_simd_set1_r(1.5), gmx_simd_set1_r(-1.5));
+     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(-1, 2, -3), gmx_simd_xor_r(signbit, setSimdRealFrom3R(1, -2, 3)));
  }
  
- TEST_F(SimdFloatingpointTest, gmxSimdXorR)
+ TEST_F(SimdFloatingpointTest, gmxSimdAndnotR)
  {
-     GMX_EXPECT_SIMD_REAL_EQ(rSimd_Bits6, gmx_simd_xor_r(rSimd_Bits1, rSimd_Bits2)); // Bits1 ^ Bits2 = Bits3
+     /* Use xor (which we already tested, so fix that first if both tests fail)
+      * to extract the sign bit, and then use andnot to take absolute values.
+      */
+     gmx_simd_real_t signbit = gmx_simd_xor_r(gmx_simd_set1_r(1.5), gmx_simd_set1_r(-1.5));
+     GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(1, 2, 3), gmx_simd_andnot_r(signbit, setSimdRealFrom3R(-1, 2, -3)));
  }
  #endif
  
  TEST_F(SimdFloatingpointTest, gmxSimdMaxR)
index 8c7bffbf542b77c827a515ccba1a9522cce6d95e,0000000000000000000000000000000000000000..64ef078feb8833726b0492fb183612e2006d6526
mode 100644,000000..100644
--- /dev/null
@@@ -1,277 -1,0 +1,277 @@@
- #if defined(HAVE_UNISTD_H) && !defined(__native_client__)
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team.
 + * Copyright (c) 2013,2014, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +#include "gmxpre.h"
 +
 +#include "basenetwork.h"
 +
 +#include "config.h"
 +
 +#include <cctype>
 +#include <cstdio>
 +#include <cstdlib>
 +#include <cstring>
 +
 +#include <algorithm>
 +#include <exception>
 +
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
 +#include "gromacs/utility/cstringutil.h"
 +#include "gromacs/utility/fatalerror.h"
 +#include "gromacs/utility/gmxmpi.h"
 +#include "gromacs/utility/programcontext.h"
 +
 +int gmx_gethostname(char *name, size_t len)
 +{
 +    if (len < 8)
 +    {
 +        gmx_incons("gmx_gethostname called with len<8");
 +    }
++#if defined(HAVE_UNISTD_H) && !defined(__native_client__) && !defined(__MINGW32__)
 +    if (gethostname(name, len-1) != 0)
 +    {
 +        std::strncpy(name, "unknown", 8);
 +        return -1;
 +    }
 +    return 0;
 +#else
 +    std::strncpy(name, "unknown", 8);
 +    return -1;
 +#endif
 +}
 +
 +gmx_bool gmx_mpi_initialized(void)
 +{
 +#ifndef GMX_MPI
 +    return 0;
 +#else
 +    int n;
 +    MPI_Initialized(&n);
 +
 +    return n;
 +#endif
 +}
 +
 +int gmx_node_num(void)
 +{
 +#ifndef GMX_MPI
 +    return 1;
 +#else
 +#ifdef GMX_THREAD_MPI
 +    if (!gmx_mpi_initialized())
 +    {
 +        return 1;
 +    }
 +#endif
 +    int i;
 +    (void) MPI_Comm_size(MPI_COMM_WORLD, &i);
 +    return i;
 +#endif
 +}
 +
 +int gmx_node_rank(void)
 +{
 +#ifndef GMX_MPI
 +    return 0;
 +#else
 +#ifdef GMX_THREAD_MPI
 +    if (!gmx_mpi_initialized())
 +    {
 +        return 0;
 +    }
 +#endif
 +    int i;
 +    (void) MPI_Comm_rank(MPI_COMM_WORLD, &i);
 +    return i;
 +#endif
 +}
 +
 +static int mpi_hostname_hash(void)
 +{
 +    int hash_int;
 +
 +#ifndef GMX_LIB_MPI
 +    /* We have a single physical node */
 +    hash_int = 0;
 +#else
 +    int  resultlen;
 +    char mpi_hostname[MPI_MAX_PROCESSOR_NAME];
 +
 +    /* This procedure can only differentiate nodes with different names.
 +     * Architectures where different physical nodes have identical names,
 +     * such as IBM Blue Gene, should use an architecture specific solution.
 +     */
 +    MPI_Get_processor_name(mpi_hostname, &resultlen);
 +
 +    /* The string hash function returns an unsigned int. We cast to an int.
 +     * Negative numbers are converted to positive by setting the sign bit to 0.
 +     * This makes the hash one bit smaller.
 +     * A 63-bit hash (with 64-bit int) should be enough for unique node hashes,
 +     * even on a million node machine. 31 bits might not be enough though!
 +     */
 +    hash_int =
 +        (int)gmx_string_fullhash_func(mpi_hostname, gmx_string_hash_init);
 +    if (hash_int < 0)
 +    {
 +        hash_int -= INT_MIN;
 +    }
 +#endif
 +
 +    return hash_int;
 +}
 +
 +#if defined GMX_LIB_MPI && defined GMX_TARGET_BGQ
 +#ifdef __clang__
 +/* IBM's declaration of this function in
 + * /bgsys/drivers/V1R2M2/ppc64/spi/include/kernel/process.h
 + * erroneously fails to specify __INLINE__, despite
 + * /bgsys/drivers/V1R2M2/ppc64/spi/include/kernel/cnk/process_impl.h
 + * specifiying __INLINE__, so bgclang thinks they are different enough
 + * to complain about. */
 +static uint64_t Kernel_GetJobID();
 +#endif
 +#include <spi/include/kernel/location.h>
 +
 +static int bgq_nodenum(void)
 +{
 +    int           hostnum;
 +    Personality_t personality;
 +    Kernel_GetPersonality(&personality, sizeof(personality));
 +    /* Each MPI rank has a unique coordinate in a 6-dimensional space
 +       (A,B,C,D,E,T), with dimensions A-E corresponding to different
 +       physical nodes, and T within each node. Each node has sixteen
 +       physical cores, each of which can have up to four hardware
 +       threads, so 0 <= T <= 63 (but the maximum value of T depends on
 +       the confituration of ranks and OpenMP threads per
 +       node). However, T is irrelevant for computing a suitable return
 +       value for gmx_hostname_num().
 +     */
 +    hostnum  = personality.Network_Config.Acoord;
 +    hostnum *= personality.Network_Config.Bnodes;
 +    hostnum += personality.Network_Config.Bcoord;
 +    hostnum *= personality.Network_Config.Cnodes;
 +    hostnum += personality.Network_Config.Ccoord;
 +    hostnum *= personality.Network_Config.Dnodes;
 +    hostnum += personality.Network_Config.Dcoord;
 +    hostnum *= personality.Network_Config.Enodes;
 +    hostnum += personality.Network_Config.Ecoord;
 +
 +    if (debug)
 +    {
 +        std::fprintf(debug,
 +                     "Torus ID A: %d / %d B: %d / %d C: %d / %d D: %d / %d E: %d / %d\n"
 +                     "Node ID T: %d / %d core: %d / %d hardware thread: %d / %d\n",
 +                     personality.Network_Config.Acoord,
 +                     personality.Network_Config.Anodes,
 +                     personality.Network_Config.Bcoord,
 +                     personality.Network_Config.Bnodes,
 +                     personality.Network_Config.Ccoord,
 +                     personality.Network_Config.Cnodes,
 +                     personality.Network_Config.Dcoord,
 +                     personality.Network_Config.Dnodes,
 +                     personality.Network_Config.Ecoord,
 +                     personality.Network_Config.Enodes,
 +                     Kernel_ProcessorCoreID(),
 +                     16,
 +                     Kernel_ProcessorID(),
 +                     64,
 +                     Kernel_ProcessorThreadID(),
 +                     4);
 +    }
 +    return hostnum;
 +}
 +#endif
 +
 +int gmx_physicalnode_id_hash(void)
 +{
 +    int hash;
 +
 +#ifndef GMX_MPI
 +    hash = 0;
 +#else
 +#ifdef GMX_THREAD_MPI
 +    /* thread-MPI currently puts the thread number in the process name,
 +     * we might want to change this, as this is inconsistent with what
 +     * most MPI implementations would do when running on a single node.
 +     */
 +    hash = 0;
 +#else
 +#ifdef GMX_TARGET_BGQ
 +    hash = bgq_nodenum();
 +#else
 +    hash = mpi_hostname_hash();
 +#endif
 +#endif
 +#endif
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "In gmx_physicalnode_id_hash: hash %d\n", hash);
 +    }
 +
 +    return hash;
 +}
 +
 +#ifdef GMX_LIB_MPI
 +void gmx_abort(int errorno)
 +{
 +    const char *programName = "GROMACS";
 +    try
 +    {
 +        programName = gmx::getProgramContext().displayName();
 +    }
 +    catch (const std::exception &)
 +    {
 +    }
 +    const int nnodes   = gmx_node_num();
 +    const int noderank = gmx_node_rank();
 +    if (nnodes > 1)
 +    {
 +        std::fprintf(stderr, "Halting parallel program %s on rank %d out of %d\n",
 +                     programName, noderank, nnodes);
 +    }
 +    else
 +    {
 +        std::fprintf(stderr, "Halting program %s\n", programName);
 +    }
 +
 +    MPI_Abort(MPI_COMM_WORLD, errorno);
 +    std::exit(errorno);
 +}
 +#endif
index 0733dc377197a191a661f1101b150d0739f45707,d71419f70b3261596501c527be227a97a2eb6b87..0768c5086410cfe35b4caf93f71c319210519316
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  /* This file is completely threadsafe - keep it that way! */
 +#include "gmxpre.h"
 +
  #include "cstringutil.h"
  
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "config.h"
  
  #include <assert.h>
  #include <ctype.h>
  #include <unistd.h>
  #endif
  
 -#include "gromacs/legacyheaders/types/simple.h"
 -#include "gromacs/legacyheaders/gmx_fatal.h"
 -#include "gromacs/legacyheaders/main.h"
 -
 -#include "gromacs/fileio/futil.h"
 +#include "gromacs/utility/basedefinitions.h"
 +#include "gromacs/utility/basenetwork.h"
 +#include "gromacs/utility/fatalerror.h"
 +#include "gromacs/utility/futil.h"
  #include "gromacs/utility/smalloc.h"
  
  int continuing(char *s)
@@@ -192,20 -193,22 +192,22 @@@ void trim (char *str
  char *
  gmx_ctime_r(const time_t *clock, char *buf, int n)
  {
-     char tmpbuf[STRLEN];
- #ifdef GMX_NATIVE_WINDOWS
+ #ifdef _MSC_VER
      /* Windows */
-     ctime_s( tmpbuf, STRLEN, clock );
+     ctime_s( buf, n, clock );
+ #elif defined(GMX_NATIVE_WINDOWS)
+     char *tmpbuf = ctime( clock );
+     strncpy(buf, tmpbuf, n-1);
+     buf[n-1] = '\0';
  #elif (defined(__sun))
      /*Solaris*/
-     ctime_r(clock, tmpbuf, n);
+     ctime_r(clock, buf, n);
  #else
+     char tmpbuf[STRLEN];
      ctime_r(clock, tmpbuf);
- #endif
      strncpy(buf, tmpbuf, n-1);
      buf[n-1] = '\0';
+ #endif
      return buf;
  }
  
@@@ -213,7 -216,7 +215,7 @@@ void nice_header (FILE *out, const cha
  {
      const char    *unk = "onbekend";
      time_t         clock;
 -    const char    *user = unk;
 +    const char    *user;
      int            gh;
  #ifdef HAVE_PWD_H
      uid_t          uid;
      /* pw returns null on error (e.g. compute nodes lack /etc/passwd) */
      user = pw ? pw->pw_name : unk;
  #else
 -    uid = 0;
 -    gh  = -1;
 +    uid  = 0;
 +    gh   = -1;
 +    user = unk;
  #endif
  
      gmx_ctime_r(&clock, timebuf, STRLEN);
@@@ -587,12 -589,6 +589,12 @@@ str_to_int64_t(const char *str, char **
  #endif
  }
  
 +char *gmx_step_str(gmx_int64_t i, char *buf)
 +{
 +    sprintf(buf, "%"GMX_PRId64, i);
 +    return buf;
 +}
 +
  void parse_digits_from_plain_string(const char *digitstring, int *ndigits, int **digitlist)
  {
      int i;
index e7d840f355dcdb6aacbc289e403182b943b0643f,a93bf6f1f0a6a55ddda9693fb51e426c874d15a1..af28843ef532da32ed1df43370144d54af280627
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 -#ifdef HAVE_CONFIG_H
 +#include "gmxpre.h"
 +
 +#include "gromacs/utility/futil.h"
 +
  #include "config.h"
 -#endif
  
  #include <stdio.h>
  #include <stdlib.h>
@@@ -58,6 -56,7 +58,7 @@@
  #endif
  
  #ifdef GMX_NATIVE_WINDOWS
+ #include <windows.h>
  #include <direct.h>
  #include <io.h>
  #endif
  
  #include "thread_mpi/threads.h"
  
 -#include "gromacs/legacyheaders/gmx_fatal.h"
 -#include "gromacs/legacyheaders/types/commrec.h"
 -#include "gromacs/legacyheaders/network.h"
 -
 -#include "gromacs/fileio/futil.h"
 -#include "gromacs/fileio/path.h"
  #include "gromacs/utility/cstringutil.h"
  #include "gromacs/utility/exceptions.h"
 +#include "gromacs/utility/fatalerror.h"
 +#include "gromacs/utility/path.h"
  #include "gromacs/utility/programcontext.h"
  #include "gromacs/utility/smalloc.h"
  #include "gromacs/utility/stringutil.h"
@@@ -87,15 -90,15 +88,15 @@@ typedef struct t_pstack 
  } t_pstack;
  
  static t_pstack    *pstack      = NULL;
 -static gmx_bool     bUnbuffered = FALSE;
 +static bool         bUnbuffered = false;
  
  /* this linked list is an intrinsically globally shared object, so we have
     to protect it with mutexes */
  static tMPI_Thread_mutex_t pstack_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
  
 -void no_buffers(void)
 +void gmx_disable_file_buffering(void)
  {
 -    bUnbuffered = TRUE;
 +    bUnbuffered = true;
  }
  
  void push_ps(FILE *fp)
@@@ -196,6 -199,10 +197,6 @@@ int gmx_ffclose(FILE *fp
  }
  
  
 -#ifdef rewind
 -#undef rewind
 -#endif
 -
  void frewind(FILE *fp)
  {
      tMPI_Thread_mutex_lock(&pstack_mutex);
@@@ -234,13 -241,37 +235,17 @@@ gmx_off_t gmx_ftell(FILE *stream
      return ftello(stream);
  #else
  #ifdef HAVE__FSEEKI64
+ #ifndef __MINGW32__
      return _ftelli64(stream);
+ #else
+     return ftello64(stream);
+ #endif
  #else
      return ftell(stream);
  #endif
  #endif
  }
  
 -
 -gmx_bool is_pipe(FILE *fp)
 -{
 -    tMPI_Thread_mutex_lock(&pstack_mutex);
 -
 -    t_pstack *ps = pstack;
 -    while (ps != NULL)
 -    {
 -        if (ps->fp == fp)
 -        {
 -            tMPI_Thread_mutex_unlock(&pstack_mutex);
 -            return TRUE;
 -        }
 -        ps = ps->prev;
 -    }
 -    tMPI_Thread_mutex_unlock(&pstack_mutex);
 -    return FALSE;
 -}
 -
 -
  static FILE *uncompress(const char *fn, const char *mode)
  {
      FILE *fp;
@@@ -299,6 -330,41 +304,6 @@@ gmx_bool gmx_fexist(const char *fname
      }
  }
  
 -
 -gmx_bool gmx_fexist_master(const char *fname, t_commrec *cr)
 -{
 -    gmx_bool bExist;
 -
 -    if (SIMMASTER(cr))
 -    {
 -        bExist = gmx_fexist(fname);
 -    }
 -    if (PAR(cr))
 -    {
 -        gmx_bcast(sizeof(bExist), &bExist, cr);
 -    }
 -    return bExist;
 -}
 -
 -gmx_bool gmx_eof(FILE *fp)
 -{
 -    char     data[4];
 -    gmx_bool beof;
 -
 -    if (is_pipe(fp))
 -    {
 -        return feof(fp);
 -    }
 -    else
 -    {
 -        if ((beof = fread(data, 1, 1, fp)) == 1)
 -        {
 -            gmx_fseek(fp, -1, SEEK_CUR);
 -        }
 -        return !beof;
 -    }
 -}
 -
  static char *backup_fn(const char *file, int count_max)
  {
      /* Use a reasonably low value for countmax; we might
@@@ -487,12 -553,12 +492,12 @@@ FILE *gmx_ffopen(const char *file, cons
  /* Our own implementation of dirent-like functionality to scan directories. */
  struct gmx_directory
  {
- #ifdef HAVE_DIRENT_H
-     DIR  *               dirent_handle;
- #elif (defined GMX_NATIVE_WINDOWS)
+ #if defined(GMX_NATIVE_WINDOWS)
      intptr_t             windows_handle;
      struct _finddata_t   finddata;
      int                  first;
+ #elif defined(HAVE_DIRENT_H)
+     DIR  *               dirent_handle;
  #else
      int                  dummy;
  #endif
@@@ -509,19 -575,7 +514,7 @@@ gmx_directory_open(gmx_directory_t *p_g
  
      *p_gmxdir = gmxdir;
  
- #ifdef HAVE_DIRENT_H
-     if ( (gmxdir->dirent_handle = opendir(dirname)) != NULL)
-     {
-         rc = 0;
-     }
-     else
-     {
-         sfree(gmxdir);
-         *p_gmxdir = NULL;
-         rc        = EINVAL;
-     }
- #elif (defined GMX_NATIVE_WINDOWS)
+ #if defined(GMX_NATIVE_WINDOWS)
      if (dirname != NULL && strlen(dirname) > 0)
      {
          char *     tmpname;
      {
          rc = EINVAL;
      }
+ #elif defined(HAVE_DIRENT_H)
+     if ( (gmxdir->dirent_handle = opendir(dirname)) != NULL)
+     {
+         rc = 0;
+     }
+     else
+     {
+         sfree(gmxdir);
+         *p_gmxdir = NULL;
+         rc        = EINVAL;
+     }
  #else
      gmx_fatal(FARGS,
                "Source compiled without POSIX dirent or windows support - cannot scan directories.\n"
@@@ -581,8 -646,41 +585,41 @@@ gmx_directory_nextfile(gmx_directory_t 
  {
      int                     rc;
  
- #ifdef HAVE_DIRENT_H
+ #if defined(GMX_NATIVE_WINDOWS)
+     if (gmxdir != NULL)
+     {
+         if (gmxdir->windows_handle <= 0)
+         {
  
+             name[0] = '\0';
+             rc      = ENOENT;
+         }
+         else if (gmxdir->first == 1)
+         {
+             strncpy(name, gmxdir->finddata.name, maxlength_name);
+             rc            = 0;
+             gmxdir->first = 0;
+         }
+         else
+         {
+             if (_findnext(gmxdir->windows_handle, &gmxdir->finddata) == 0)
+             {
+                 strncpy(name, gmxdir->finddata.name, maxlength_name);
+                 rc      = 0;
+             }
+             else
+             {
+                 name[0] = '\0';
+                 rc      = ENOENT;
+             }
+         }
+     }
+     else
+     {
+         name[0] = '\0';
+         rc      = EINVAL;
+     }
+ #elif defined(HAVE_DIRENT_H)
      struct dirent *         direntp_large;
      struct dirent *         p;
  
          name[0] = '\0';
          rc      = EINVAL;
      }
- #elif (defined GMX_NATIVE_WINDOWS)
-     if (gmxdir != NULL)
-     {
-         if (gmxdir->windows_handle <= 0)
-         {
-             name[0] = '\0';
-             rc      = ENOENT;
-         }
-         else if (gmxdir->first == 1)
-         {
-             strncpy(name, gmxdir->finddata.name, maxlength_name);
-             rc            = 0;
-             gmxdir->first = 0;
-         }
-         else
-         {
-             if (_findnext(gmxdir->windows_handle, &gmxdir->finddata) == 0)
-             {
-                 strncpy(name, gmxdir->finddata.name, maxlength_name);
-                 rc      = 0;
-             }
-             else
-             {
-                 name[0] = '\0';
-                 rc      = ENOENT;
-             }
-         }
-     }
  #else
      gmx_fatal(FARGS,
                "Source compiled without POSIX dirent or windows support - cannot scan directories.\n");
@@@ -658,10 -724,10 +663,10 @@@ in
  gmx_directory_close(gmx_directory_t gmxdir)
  {
      int                     rc;
- #ifdef HAVE_DIRENT_H
-     rc = (gmxdir != NULL) ? closedir(gmxdir->dirent_handle) : EINVAL;
- #elif (defined GMX_NATIVE_WINDOWS)
+ #if defined(GMX_NATIVE_WINDOWS)
      rc = (gmxdir != NULL) ? _findclose(gmxdir->windows_handle) : EINVAL;
+ #elif defined(HAVE_DIRENT_H)
+     rc = (gmxdir != NULL) ? closedir(gmxdir->dirent_handle) : EINVAL;
  #else
      gmx_fatal(FARGS,
                "Source compiled without POSIX dirent or windows support - cannot scan directories.\n");
@@@ -804,6 -870,28 +809,6 @@@ void gmx_tmpnam(char *buf
      /* name in Buf should now be OK */
  }
  
 -int gmx_truncatefile(char *path, gmx_off_t length)
 -{
 -#ifdef GMX_NATIVE_WINDOWS
 -    /* Microsoft visual studio does not have "truncate" */
 -    HANDLE        fh;
 -    LARGE_INTEGER win_length;
 -
 -    win_length.QuadPart = length;
 -
 -    fh = CreateFile(path, GENERIC_READ | GENERIC_WRITE, 0, NULL,
 -                    OPEN_EXISTING, 0, NULL);
 -    SetFilePointerEx(fh, win_length, NULL, FILE_BEGIN);
 -    SetEndOfFile(fh);
 -    CloseHandle(fh);
 -
 -    return 0;
 -#else
 -    return truncate(path, length);
 -#endif
 -}
 -
 -
  int gmx_file_rename(const char *oldname, const char *newname)
  {
  #ifndef GMX_NATIVE_WINDOWS
@@@ -906,15 -994,13 +911,15 @@@ int gmx_fsync(FILE *fp
      rc = fah_fsync(fp);
  #else /* GMX_FAHCORE */
      {
 -        int fn = -1;
 +        int fn;
  
          /* get the file number */
  #if defined(HAVE_FILENO)
          fn = fileno(fp);
  #elif defined(HAVE__FILENO)
          fn = _fileno(fp);
 +#else
 +        fn = -1;
  #endif
  
          /* do the actual fsync */
index 54a873e9feb098e56fcabe8ece1c869aead8eb67,d70c1efd672e75731eea3794a322785d0d04ee3e..ae67ee18519c711affefb2b4ceceafb8d2bcc3b5
@@@ -1,7 -1,7 +1,7 @@@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
 - * Copyright (c) 2012, by the GROMACS development team, led by
 + * Copyright (c) 2012,2014, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
   * \brief
   * Include file for configuration macros that affect installed headers.
   *
 - * This include file (or rather, one that it includes) will configured by CMake
 - * and installed with GROMACS header files so that they can refer to a central
 - * location for \#defines that will be available for builds of projects that
 - * depend on GROMACS.
 - *
 - * The actual defines are in gmx_header_config_gen.h to allow usage of relative
 - * include paths before installation.
 + * This include file will be installed with GROMACS header files so that they
 + * can refer to a central location for \#defines that will be available for
 + * builds of projects that depend on GROMACS.
   *
   * \todo
 - * It would be better to have the defines here such that they are not generated
 - * from CMake, but instead detected using \#ifdefs (possible for some of the
 - * macros currently used).
 - * Even better would be to not have these defines at all.
 + * It would be better to not have these defines at all in installed headers.
   *
   * \inlibraryapi
   * \ingroup module_utility
   */
 -#include "gmx_header_config_gen.h"
 +
- /* We currently don't support MingW. And ICC also defines it */
- #ifdef _MSC_VER
++#if defined( _WIN32 ) || defined( _WIN64 )
 +#define GMX_NATIVE_WINDOWS
 +#endif
index 292b1cd198543d38cd9fb4f49757186dfb23e024,276aee8c13540b3055289c1e6d79fc14b9f0d0e4..f0e67a593f4af86bfbcfe62cf2d5f5c821aa0a52
@@@ -49,9 -49,9 +49,9 @@@
  #ifndef GMX_UTILITY_OMP_H
  #define GMX_UTILITY_OMP_H
  
 -#ifdef HAVE_CONFIG_H
  #include "config.h"
 -#endif
 +
 +#include <stdio.h>
  
  #ifndef GMX_NATIVE_WINDOWS
  /* Ugly hack because the openmp implementation below hacks into the SIMD
@@@ -67,7 -67,8 +67,7 @@@
  #include <windows.h>
  #endif
  
 -#include "types/commrec.h"
 -#include "mdrun.h"
 +#include "basedefinitions.h"
  
  #ifdef __cplusplus
  extern "C"
@@@ -112,45 -113,23 +112,45 @@@ void gmx_omp_set_num_threads(int num_th
  /*! \brief
   * Check for externally set thread affinity to avoid conflicts with \Gromacs
   * internal setting.
 + *
 + * \param[out] message  Receives the message to be shown to the user.
 + * \returns `true` if we can set thread affinity ourselves.
 + *
 + * While GNU OpenMP does not set affinity by default, the Intel OpenMP library
 + * does.  This conflicts with the internal affinity (especially thread-MPI)
 + * setting, results in incorrectly locked threads, and causes dreadful performance.
 + *
 + * The KMP_AFFINITY environment variable is used by Intel, GOMP_CPU_AFFINITY
 + * by the GNU compilers (Intel also honors it well).  If any of the variables
 + * is set, we should honor it and disable the internal pinning.
 + * When using Intel OpenMP, we will disable affinity if the user did not set it
 + * manually through one of the aforementioned environment variables.
 + *
 + * Note that the Intel OpenMP affinity disabling will only take effect if this
 + * function is called before the OpenMP library gets initialized, which happens
 + * when the first call is made into a compilation unit that contains OpenMP
 + * pragmas.
 + *
 + * If this function returns `false`, the caller is responsible to disable the
 + * pinning, show the message from \p *message to the user, and free the memory
 + * allocated for \p *message.
 + * If the return value is `true`, \p *message is NULL.
   */
 -void gmx_omp_check_thread_affinity(FILE *fplog, const t_commrec *cr,
 -                                   gmx_hw_opt_t *hw_opt);
 +gmx_bool gmx_omp_check_thread_affinity(char **message);
  
  /*! \brief
   * Pause for use in a spin-wait loop.
   */
  static gmx_inline void gmx_pause()
  {
- #ifndef GMX_NATIVE_WINDOWS
+ #ifndef _MSC_VER
      /* Ugly hack because the openmp implementation below hacks into the SIMD
       * settings to decide when to use _mm_pause(). This should eventually be
       * changed into proper detection of the intrinsics uses, not SIMD.
       */
- #if (defined GMX_SIMD_X86_SSE2) || (defined GMX_SIMD_X86_SSE4_1) || \
+ #if ((defined GMX_SIMD_X86_SSE2) || (defined GMX_SIMD_X86_SSE4_1) || \
      (defined GMX_SIMD_X86_AVX_128_FMA) || (defined GMX_SIMD_X86_AVX_256) || \
-     (defined GMX_SIMD_X86_AVX2_256)
+     (defined GMX_SIMD_X86_AVX2_256)) && !defined(__MINGW32__)
      /* Replace with tbb::internal::atomic_backoff when/if we use TBB */
      _mm_pause();
  #elif defined __MIC__
index 6b45a99a82be08246483cfa8eb7035a6d6cfbf4a,7d8e85c029b9cce1cdda8986077a3d4a0fe95ba8..e542c5532767e855591fca75a2f0f900a76453dc
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 +#include "gmxpre.h"
 +
  #include "gromacs/utility/smalloc.h"
  
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "config.h"
  
  #include <errno.h>
  #include <stdio.h>
  #ifdef WITH_DMALLOC
  #include <dmalloc.h>
  #endif
+ #ifdef HAVE__ALIGNED_MALLOC
+ #include <malloc.h>
+ #endif
  
 -#include "gromacs/legacyheaders/gmx_fatal.h"
 +#include "thread_mpi/threads.h"
  
 +#include "gromacs/utility/fatalerror.h"
  #ifdef PRINT_ALLOC_KB
 -#include "gromacs/legacyheaders/network.h"
 +#include "gromacs/utility/basenetwork.h"
  #include "gromacs/utility/gmxmpi.h"
  #endif
  
 -#ifdef DEBUG
 -#include "thread_mpi/threads.h"
 +static gmx_bool            g_bOverAllocDD     = FALSE;
 +static tMPI_Thread_mutex_t g_over_alloc_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
  
 +#ifdef DEBUG
  static void log_action(int bMal, const char *what, const char *file, int line,
                         int nelem, int size, void *ptr)
  {
@@@ -289,6 -290,7 +292,6 @@@ void *save_malloc_aligned(const char *n
          }
  #endif
  
 -        allocate_fail = FALSE; /* stop compiler warnings */
  #ifdef HAVE_POSIX_MEMALIGN
          allocate_fail = (0 != posix_memalign(&malloced, alignment, nelem*elsize));
  #elif defined HAVE_MEMALIGN
@@@ -337,7 -339,7 +340,7 @@@ void *save_calloc_aligned(const char *n
  }
  
  /* This routine can NOT be called with any pointer */
- void save_free_aligned(const char *name, const char *file, int line, void *ptr)
+ void save_free_aligned(const char gmx_unused *name, const char gmx_unused *file, int gmx_unused line, void *ptr)
  {
      int   i, j;
      void *free = ptr;
  #endif
      }
  }
 +
 +void set_over_alloc_dd(gmx_bool set)
 +{
 +    tMPI_Thread_mutex_lock(&g_over_alloc_mutex);
 +    /* we just make sure that we don't set this at the same time.
 +       We don't worry too much about reading this rarely-set variable */
 +    g_bOverAllocDD = set;
 +    tMPI_Thread_mutex_unlock(&g_over_alloc_mutex);
 +}
 +
 +int over_alloc_dd(int n)
 +{
 +    if (g_bOverAllocDD)
 +    {
 +        return OVER_ALLOC_FAC*n + 100;
 +    }
 +    else
 +    {
 +        return n;
 +    }
 +}
index db626c4a3d4bdee72eaf0c092a52aaf3daba5558,77355e17161a080f8b145f7138cd5d448647af81..5ea00c67d79dad5e19ee9e266d4ef481f38b6a87
@@@ -39,8 -39,6 +39,8 @@@
   * \author Mark Abraham <mark.j.abraham@gmail.com>
   * \ingroup module_testutils
   */
 +#include "gmxpre.h"
 +
  #include "integrationtests.h"
  
  #include "testutils/testoptions.h"
@@@ -52,8 -50,6 +52,8 @@@
  #include <stdlib.h>
  #include <stdio.h>
  
 +#include "config.h"
 +
  namespace gmx
  {
  namespace test
@@@ -79,7 -75,7 +79,7 @@@ IntegrationTestFixture::IntegrationTest
      // TODO fix this when we have an encapsulation layer for handling
      // environment variables
  #ifdef GMX_NATIVE_WINDOWS
-     _putenv_s("GMX_MAXBACKUP", s_maxBackup.c_str());
+     _putenv(("GMX_MAXBACKUP="+s_maxBackup).c_str());
  #else
      setenv("GMX_MAXBACKUP", s_maxBackup.c_str(), true);
  #endif