Merge release-5-0 into master
authorRoland Schulz <roland@utk.edu>
Thu, 4 Sep 2014 16:48:52 +0000 (12:48 -0400)
committerRoland Schulz <roland@utk.edu>
Fri, 5 Sep 2014 08:11:45 +0000 (04:11 -0400)
Conflicts:
CMakeLists.txt
trivial
share/template/cmake/FindGROMACS.cmakein
deleted in master
src/gromacs/utility/gmx_header_config_gen.h.cmakein
applied to src/gromacs/utility/gmx_header_config.h
src/gromacs/commandline/pargs.cpp
applied to src/gromacs/commandline/cmdlinemodulemanager.cpp
src/gromacs/gmxlib/gmx_thread_affinity.c
trivial
src/gromacs/gmxlib/main.cpp
applied to src/gromacs/utility/basenetwork.cpp
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/*.c
Merged the template and rerun the generator
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_template_sparc64_hpc_ace_double.pre
trivial
src/gromacs/utility/futil.cpp
change unnecessary (function deleted)
Other changes:
src/gromacs/fileio/vmdio.c
removed duplicate config.h

Change-Id: Ib4237944773e41d7b52e8c0ee181da717d2b26f3

108 files changed:
CMakeLists.txt
cmake/Platform/Toolchain-Fujitsu-Sparc64-mpi.cmake [moved from cmake/Toolchain-Fujitsu-Sparc64-mpi.cmake with 66% similarity]
cmake/Platform/Toolchain-Fujitsu-Sparc64.cmake [new file with mode: 0644]
cmake/TestFujitsuSparc64.c [new file with mode: 0644]
cmake/TestWinProcNum.c [new file with mode: 0644]
cmake/ThreadMPI.cmake
cmake/gmxCFlags.cmake
cmake/gmxDetectSimd.cmake
cmake/gmxDetectTargetArchitecture.cmake
cmake/gmxManageFujitsuSparc64.cmake [moved from cmake/Toolchain-Fujitsu-Sparc64.cmake with 68% similarity]
cmake/gmxManageOpenMP.cmake
cmake/gmxTestLibXml2.cmake
cmake/gmxTestSimd.cmake
docs/install-guide/install-guide.md
src/GetCompilerInfo.cmake
src/config.h.cmakein
src/external/gmock-1.7.0/CMakeLists.txt
src/external/thread_mpi/src/errhandler.c
src/external/thread_mpi/src/impl.h
src/external/thread_mpi/src/numa_malloc.c
src/external/thread_mpi/src/pthreads.c
src/external/thread_mpi/src/tmpi_init.c
src/external/thread_mpi/src/winthreads.c
src/external/tng_io/include/tng/tng_io.h
src/external/tng_io/src/lib/tng_io.c
src/external/vmd_molfile/vmddlopen.c
src/gromacs/commandline/cmdlinemodulemanager.cpp
src/gromacs/fileio/tngio.cpp
src/gromacs/fileio/vmdio.c
src/gromacs/gmxlib/checkpoint.cpp
src/gromacs/gmxlib/gmx_cpuid.c
src/gromacs/gmxlib/gmx_thread_affinity.c
src/gromacs/gmxlib/nonbonded/nb_free_energy.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py [changed mode: 0755->0644]
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW3W3_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW4W4_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecGB_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJEw_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.c
src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_template_sparc64_hpc_ace_double.pre
src/gromacs/gmxpreprocess/readir.c
src/gromacs/gmxpreprocess/toputil.c
src/gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace.h [new file with mode: 0644]
src/gromacs/simd/simd.h
src/gromacs/simd/tests/simd.cpp
src/gromacs/simd/tests/simd4.cpp
src/gromacs/simd/tests/simd4_floatingpoint.cpp
src/gromacs/simd/tests/simd_floatingpoint.cpp
src/gromacs/utility/basenetwork.cpp
src/gromacs/utility/cstringutil.c
src/gromacs/utility/futil.cpp
src/gromacs/utility/gmx_header_config.h
src/gromacs/utility/gmxomp.h
src/gromacs/utility/smalloc.c
src/testutils/integrationtests.cpp

index 5aafe926ada829b58ebc622faf85d92c0458ffe4..66144b8960685bc42ba4e114a9d0ed5ba9ff0459 100644 (file)
@@ -83,6 +83,7 @@ if(CMAKE_CONFIGURATION_TYPES)
 endif()
 set(build_types_with_explicit_flags RELEASE DEBUG RELWITHDEBUGINFO RELWITHASSERT MINSIZEREL PROFILE)
 
+set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ON)
 set(CPACK_PACKAGE_NAME "gromacs")
 set(CPACK_PACKAGE_VERSION_MAJOR ${GMX_VERSION_MAJOR})
 set(CPACK_PACKAGE_VERSION_MINOR ${GMX_VERSION_MINOR})
@@ -138,6 +139,16 @@ if(CMAKE_HOST_UNIX)
             "Hostname of the machine where the cache was generated.")
 endif()
 
+########################################################################
+# Detect architecture before setting options so we can alter defaults
+########################################################################
+# Detect the architecture the compiler is targetting, detect
+# SIMD instructions possibilities on that hardware, suggest SIMD instruction set
+# to use if none is specified, and populate the cache option for CPU
+# SIMD.
+include(gmxDetectTargetArchitecture)
+gmx_detect_target_architecture()
+
 ########################################################################
 # User input options                                                   #
 ########################################################################
@@ -145,7 +156,16 @@ include(gmxOptionUtilities)
 
 set(CMAKE_PREFIX_PATH "" CACHE STRING "Extra locations to search for external libraries and tools (give directory without lib, bin, or include)")
 
-option(GMX_DOUBLE "Use double precision (much slower, use only if you really need it)" OFF)
+if(GMX_TARGET_FUJITSU_SPARC64)
+    # Fujitsu only has SIMD in double precision, so this will be faster
+    set(GMX_DOUBLE_DEFAULT ON)
+else()
+    set(GMX_DOUBLE_DEFAULT OFF)
+endif()
+option(GMX_DOUBLE "Use double precision (much slower, use only if you really need it)" ${GMX_DOUBLE_DEFAULT})
+option(GMX_RELAXED_DOUBLE_PRECISION "Accept single precision 1/sqrt(x) when using Fujitsu HPC-ACE SIMD" OFF)
+mark_as_advanced(GMX_RELAXED_DOUBLE_PRECISION)
+
 option(GMX_MPI    "Build a parallel (message-passing) version of GROMACS" OFF)
 option(GMX_THREAD_MPI  "Build a thread-MPI-based multithreaded version of GROMACS (not compatible with MPI)" ON)
 gmx_dependent_option(
@@ -173,13 +193,6 @@ endif()
 set(REQUIRED_CUDA_COMPUTE_CAPABILITY 2.0)
 include(gmxManageGPU)
 
-# Detect the architecture the compiler is targetting, detect
-# SIMD instructions possibilities on that hardware, suggest SIMD instruction set
-# to use if none is specified, and populate the cache option for CPU
-# SIMD.
-include(gmxDetectTargetArchitecture)
-gmx_detect_target_architecture()
-
 if(GMX_CPU_ACCELERATION)
     # Stay compatible with old Jenkins command line options for specific SIMD acceleration
     set(GMX_SIMD "${GMX_CPU_ACCELERATION}" CACHE STRING "SIMD instruction set level and compiler optimization" FORCE)
@@ -315,21 +328,17 @@ endif()
 if(GMX_DOUBLE)
     add_definitions(-DGMX_DOUBLE)
     list(APPEND INSTALLED_HEADER_DEFINITIONS "-DGMX_DOUBLE")
+    if(GMX_RELAXED_DOUBLE_PRECISION)
+        add_definitions(-DGMX_RELAXED_DOUBLE_PRECISION)
+    endif()
 endif()
 if(GMX_SOFTWARE_INVSQRT)
     list(APPEND INSTALLED_HEADER_DEFINITIONS "-DGMX_SOFTWARE_INVSQRT")
 endif()
 
 if(WIN32 AND NOT CYGWIN)
-    set(GMX_WSOCKLIB_PATH CACHE PATH "Path to winsock (wsock32.lib) library.")
-    mark_as_advanced(GMX_WSOCKLIB_PATH)
-    find_library(WSOCK32_LIBRARY NAMES wsock32 PATHS ${GMX_WSOCKLIB_PATH})
-    if(WSOCK32_LIBRARY)
-        list(APPEND GMX_EXTRA_LIBRARIES ${WSOCK32_LIBRARY})
-        add_definitions(-DGMX_HAVE_WINSOCK)
-    else()
-        message(STATUS "No winsock found. Cannot use interactive molecular dynamics (IMD).")
-    endif(WSOCK32_LIBRARY)
+    list(APPEND GMX_EXTRA_LIBRARIES "wsock32")
+    add_definitions(-DGMX_HAVE_WINSOCK)
 endif()
 
 
@@ -388,6 +397,10 @@ if(${CMAKE_SYSTEM_NAME} MATCHES BlueGene)
     include(gmxManageBlueGene)
 endif()
 
+if(GMX_TARGET_FUJITSU_SPARC64)
+    include(gmxManageFujitsuSparc64)
+endif()
+
 ########################################################################
 #Process MPI settings
 ########################################################################
similarity index 66%
rename from cmake/Toolchain-Fujitsu-Sparc64-mpi.cmake
rename to cmake/Platform/Toolchain-Fujitsu-Sparc64-mpi.cmake
index 2b0a180b563b9b015876d2443527d7c6659aa44a..d457cc5d005e004307bd87bc50cf7787d10a71e4 100644 (file)
 # the research papers on the package. Check out http://www.gromacs.org.
 
 # the name of the target operating system
-set(CMAKE_SYSTEM_NAME Linux CACHE STRING "Cross-compiling for Fujitsu Sparc64")
+set(CMAKE_SYSTEM_NAME Linux CACHE STRING "Cross-compiling for Fujitsu Sparc64, with MPI")
+set(CMAKE_SYSTEM_PROCESSOR "s64fx")
 
 set_property(GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS FALSE)
 
 # set the compiler
 set(CMAKE_C_COMPILER mpifccpx)
 set(CMAKE_CXX_COMPILER mpiFCCpx)
-set(CMAKE_C_COMPILER_ID "Fujitsu" CACHE STRING "Prevent CMake from adding GNU-specific linker flags (-rdynamic)" FORCE)
 
-set(CMAKE_C_FLAGS "-Kopenmp -Kfast,reduction,swp,simd=2,uxsimd -x500 -Xg -DGMX_RELAXED_DOUBLE_PRECISION -w" CACHE STRING "Fujitsu Sparc64 C Flags" FORCE)
-set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "Fujitsu Sparc64 C++ Flags" FORCE)
-set(GMX_SOFTWARE_INVSQRT OFF CACHE BOOL "Use native 1.0/sqrt(x) on Fujitsu Sparc64" FORCE)
+# Prevent CMake from adding GNU-specific linker flags (-rdynamic)
+set(CMAKE_C_COMPILER_ID "Fujitsu" CACHE STRING "Fujitsu C cross-compiler" FORCE)
+set(CMAKE_CXX_COMPILER_ID "Fujitsu" CACHE STRING "Fujitsu C++ cross-compiler" FORCE)
 
-set(GMX_THREAD_MPI OFF CACHE BOOL "Use real MPI instead" FORCE)
-set(GMX_MPI ON CACHE BOOL "Use MPI library" FORCE)
-set(GMX_DOUBLE ON CACHE BOOL "Use double by default on Fujitsu Sparc64 (due to HPC-ACE)" FORCE)
-set(GMX_GPU OFF CACHE BOOL "Cannot do GPU acceleration on Fujitsu Sparc64" FORCE)
-set(BUILD_SHARED_LIBS OFF CACHE BOOL "Use static linking by default on Fujitsu Sparc64" FORCE)
-
-set(GMX_SIMD "Sparc64_HPC_ACE" CACHE STRING "Enabling Sparc64 HPC-ACE SIMD when using Fujitsu Sparc64 toolchain")
+# FindOpenMP.cmake does not try -Kopenmp,but the package will try specific
+# flags based on the compier ID.
+set(OMP_FLAG_Fujitsu "-Kopenmp")
diff --git a/cmake/Platform/Toolchain-Fujitsu-Sparc64.cmake b/cmake/Platform/Toolchain-Fujitsu-Sparc64.cmake
new file mode 100644 (file)
index 0000000..be5f138
--- /dev/null
@@ -0,0 +1,53 @@
+#
+# This file is part of the GROMACS molecular simulation package.
+#
+# Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+# and including many others, as listed in the AUTHORS file in the
+# top-level source directory and at http://www.gromacs.org.
+#
+# GROMACS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1
+# of the License, or (at your option) any later version.
+#
+# GROMACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with GROMACS; if not, see
+# http://www.gnu.org/licenses, or write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+#
+# If you want to redistribute modifications to GROMACS, please
+# consider that scientific software is very special. Version
+# control is crucial - bugs must be traceable. We will be happy to
+# consider code for inclusion in the official distribution, but
+# derived work must not be called official GROMACS. Details are found
+# in the README & COPYING files - if they are missing, get the
+# official version at http://www.gromacs.org.
+#
+# To help us fund GROMACS development, we humbly ask that you cite
+# the research papers on the package. Check out http://www.gromacs.org.
+
+# the name of the target operating system
+set(CMAKE_SYSTEM_NAME Linux CACHE STRING "Cross-compiling for Fujitsu Sparc64")
+# Set the identification to the same value we would get on the nodes (uname -m)
+set(CMAKE_SYSTEM_PROCESSOR "s64fx")
+
+set_property(GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS FALSE)
+
+# set the compiler
+set(CMAKE_C_COMPILER fccpx)
+set(CMAKE_CXX_COMPILER FCCpx)
+
+# Prevent CMake from adding GNU-specific linker flags (-rdynamic)
+# A patch has been submitted to make CMake itself handle this in the future
+set(CMAKE_C_COMPILER_ID "Fujitsu" CACHE STRING "Fujitsu C cross-compiler" FORCE)
+set(CMAKE_CXX_COMPILER_ID "Fujitsu" CACHE STRING "Fujitsu C++ cross-compiler" FORCE)
+
+# FindOpenMP.cmake does not try -Kopenmp,but the package will try specific
+# flags based on the compier ID.
+set(OMP_FLAG_Fujitsu "-Kopenmp")
diff --git a/cmake/TestFujitsuSparc64.c b/cmake/TestFujitsuSparc64.c
new file mode 100644 (file)
index 0000000..0740688
--- /dev/null
@@ -0,0 +1,8 @@
+int main()
+{
+#if defined (__FUJITSU) && ( defined(__sparc) || defined(__sparcv9) ) && ( defined(__LP64__) || defined(__arch64) )
+    return 0;
+#else
+#error This compiler is not targetting Fujitsu Sparc64
+#endif
+}
diff --git a/cmake/TestWinProcNum.c b/cmake/TestWinProcNum.c
new file mode 100644 (file)
index 0000000..669ff3b
--- /dev/null
@@ -0,0 +1,7 @@
+#define _WIN32_WINNT 0x0601 /*Require Windows7 (needed for MingW)*/
+#include <windows.h>
+int main()
+{
+    PROCESSOR_NUMBER p;
+    return 0;
+}
index d039dba5afd39972591a42db46a18341fcc4fa8f..76e4d37b052a50379452a570b257208026c8e2de 100644 (file)
@@ -62,15 +62,16 @@ MACRO(TMPI_TEST_ATOMICS INCDIR)
 
 ENDMACRO(TMPI_TEST_ATOMICS VARIABLE)
 
+try_compile(HAVE_PROCESSOR_NUMBER ${CMAKE_BINARY_DIR} "${CMAKE_SOURCE_DIR}/cmake/TestWinProcNum.c")
 
 include(FindThreads)
-if (CMAKE_USE_PTHREADS_INIT)
+if (CMAKE_USE_WIN32_THREADS_INIT AND HAVE_PROCESSOR_NUMBER)
+    set(THREAD_WINDOWS 1)
+    set(THREAD_LIB)
+elseif (CMAKE_USE_PTHREADS_INIT)
     check_include_files(pthread.h    HAVE_PTHREAD_H)
     set(THREAD_PTHREADS 1)
     set(THREAD_LIB ${CMAKE_THREAD_LIBS_INIT})
-elseif (CMAKE_USE_WIN32_THREADS_INIT)
-    set(THREAD_WINDOWS 1)
-    set(THREAD_LIB)
 else()
     message(FATAL_ERROR "Thread support required")
 endif ()
index 799bea1c7bc8f5438ea74a2f857dcd8b57ec61d2..c0eff548ba523da0122f679e4cfc7c064d125d58 100644 (file)
@@ -247,6 +247,19 @@ MACRO(gmx_c_flags)
         GMX_TEST_CXXFLAG(CXXFLAGS_WARN_EXTRA "-Wextra -Wno-missing-field-initializers -Wpointer-arith" GMXC_CXXFLAGS)
     endif()
 
+    # Fujitsu compilers on PrimeHPC/Sparc64
+    if(${CMAKE_C_COMPILER_ID} MATCHES Fujitsu OR
+       (${CMAKE_C_COMPILER_ID} MATCHES unknown AND ${CMAKE_C_COMPILER} MATCHES ^fcc))
+        GMX_TEST_CFLAG(CFLAG_GNUCOMPAT "-Xg -w" GMXC_CFLAGS)
+        GMX_TEST_CFLAG(CFLAG_OPT "-Kfast,reduction,swp,simd=2,uxsimd,fsimple -x100" GMXC_CFLAGS)
+    endif()
+
+    if(${CMAKE_CXX_COMPILER_ID} MATCHES Fujitsu OR
+       (${CMAKE_CXX_COMPILER_ID} MATCHES unknown AND ${CMAKE_CXX_COMPILER} MATCHES ^FCC))
+        GMX_TEST_CXXFLAG(CXXFLAG_GNUCOMPAT "-Xg -w" GMXC_CXXFLAGS)
+        GMX_TEST_CXXFLAG(CXXFLAG_OPT "-Kfast,reduction,swp,simd=2,uxsimd,fsimple -x100" GMXC_CXXFLAGS)
+    endif()
+
     # now actually set the flags:
     if (NOT GMX_SKIP_DEFAULT_CFLAGS)
         gmx_set_cmake_compiler_flags()
index a8ac28260544cf0ddaa490571bee82a348bce7d2..49fa77a1fcced9b6d45b15bc022d0999578222a2 100644 (file)
@@ -94,6 +94,10 @@ function(gmx_detect_simd _suggested_simd)
     if(NOT DEFINED GMX_SIMD)
         if(GMX_TARGET_BGQ)
             set(${_suggested_simd} "IBM_QPX")
+        elseif(GMX_TARGET_FUJITSU_SPARC64)
+            # HPC-ACE is always present. In the future we
+            # should add detection for HPC-ACE2 here.
+            set(${_suggested_simd} "Sparc64_HPC_ACE")
         elseif(GMX_TARGET_X86)
             gmx_suggest_x86_simd(${_suggested_simd})
         else()
index 2bd98720903e27ee21bab591d4c2dac81cd6edaa..2a188c77b30f443474151d6b9196ea556f8ca1b4 100644 (file)
@@ -53,4 +53,8 @@ function(gmx_detect_target_architecture)
         try_compile(GMX_TARGET_MIC ${CMAKE_BINARY_DIR}
             "${CMAKE_SOURCE_DIR}/cmake/TestMIC.c")
     endif()
+    if (NOT DEFINED GMX_TARGET_FUJITSU_SPARC64)
+        try_compile(GMX_TARGET_FUJITSU_SPARC64 ${CMAKE_BINARY_DIR}
+            "${CMAKE_SOURCE_DIR}/cmake/TestFujitsuSparc64.c")
+    endif()
 endfunction()
similarity index 68%
rename from cmake/Toolchain-Fujitsu-Sparc64.cmake
rename to cmake/gmxManageFujitsuSparc64.cmake
index 3f301eed64a30e015fc73cb9d0476ad4373b2066..e0b705ceee1aa686b97c5d5a4a0ac88d281bf7b6 100644 (file)
 # To help us fund GROMACS development, we humbly ask that you cite
 # the research papers on the package. Check out http://www.gromacs.org.
 
-# the name of the target operating system
-set(CMAKE_SYSTEM_NAME Linux CACHE STRING "Cross-compiling for Fujitsu Sparc64")
+# Managing configuration for Fujitsu PrimeHPC Sparc64
+# For now this is mainly used for K computer.
+message(STATUS "Configuring for Fujitsu Sparc64")
 
-set_property(GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS FALSE)
-
-# set the compiler
-set(CMAKE_C_COMPILER fccpx)
-set(CMAKE_CXX_COMPILER FCCpx)
-set(CMAKE_C_COMPILER_ID "Fujitsu" CACHE STRING "Prevent CMake from adding GNU-specific linker flags (-rdynamic)" FORCE)
+set(BUILD_SHARED_LIBS OFF CACHE BOOL "Use static linking by default on Fujitsu Sparc64" FORCE)
+set(GMX_GPU OFF CACHE BOOL "Cannot do GPU acceleration on Fujitsu Sparc64" FORCE)
 
-set(CMAKE_C_FLAGS "-Kopenmp -Kfast,reduction,swp,simd=2,uxsimd -x500 -Xg -DGMX_RELAXED_DOUBLE_PRECISION -w" CACHE STRING "Fujitsu Sparc64 C Flags" FORCE)
-set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "Fujitsu Sparc64 C++ Flags" FORCE)
 set(GMX_SOFTWARE_INVSQRT OFF CACHE BOOL "Use native 1.0/sqrt(x) on Fujitsu Sparc64" FORCE)
+set(GMX_X11 OFF CACHE BOOL "X11 not compatible with Fujitsu Sparc64 cross-compile, disabled." FORCE)
 
-# By default CMake will use thread-mpi
-set(GMX_DOUBLE ON CACHE BOOL "Use double by default on Fujitsu Sparc64 (due to HPC-ACE)" FORCE)
-set(GMX_GPU OFF CACHE BOOL "Cannot do GPU acceleration on Fujitsu Sparc64" FORCE)
-set(BUILD_SHARED_LIBS OFF CACHE BOOL "Use static linking by default on Fujitsu Sparc64" FORCE)
-
-set(GMX_SIMD "Sparc64_HPC_ACE" CACHE STRING "Enabling Sparc64 HPC-ACE SIMD when using Fujitsu Sparc64 toolchain")
index 0fa2faeb77a7970278b265c099f7854b1de44cfe..cbad40bc671e1eb9a3867c4c7f5042ac28117248 100644 (file)
@@ -56,7 +56,7 @@ if(GMX_OPENMP)
         if(OPENMP_FOUND)
             # CMake on Windows doesn't support linker flags passed to target_link_libraries
             # (i.e. it treats /openmp as \openmp library file). Also, no OpenMP linker flags are needed.
-            if(NOT (WIN32 AND NOT CYGWIN))
+            if(NOT (WIN32 AND NOT CYGWIN AND NOT MINGW))
                 if(CMAKE_COMPILER_IS_GNUCC AND GMX_PREFER_STATIC_OPENMP AND NOT APPLE)
                     set(OpenMP_LINKER_FLAGS "-Wl,-static -lgomp -lrt -Wl,-Bdynamic -lpthread")
                     set(OpenMP_SHARED_LINKER_FLAGS "")
@@ -70,6 +70,10 @@ if(GMX_OPENMP)
                     endif()
                 endif()
             endif()
+            if(MINGW)
+                #GCC Bug 48659
+                set(OpenMP_C_FLAGS "${OpenMP_C_FLAGS} -mstackrealign")
+            endif()
         else()
             message(WARNING
                     "The compiler you are using does not support OpenMP parallelism. This might hurt your performance a lot, in particular with GPUs. Try using a more recent version, or a different compiler. For now, we are proceeding by turning off OpenMP.")
index 4546bfcd7cd03524d01d286ec2d242f642abdda1..48e1c21d661875fc1190684154ac2482d04287a4 100644 (file)
@@ -42,6 +42,7 @@
 #  VARIABLE will be set to true if libxml2 support is present
 
 include(CheckLibraryExists)
+include(CheckIncludeFiles)
 include(gmxOptionUtilities)
 function(GMX_TEST_LIBXML2 VARIABLE)
     if(LIBXML2_FOUND)
@@ -50,7 +51,24 @@ function(GMX_TEST_LIBXML2 VARIABLE)
             unset(LIBXML2_LINKS_OK CACHE)
         endif()
         check_library_exists("${LIBXML2_LIBRARIES}" "xmlTextWriterEndAttribute" "" LIBXML2_LINKS_OK)
-        set(${VARIABLE} ${LIBXML2_LINKS_OK} PARENT_SCOPE)
+        if(LIBXML2_LINKS_OK)
+            #check that xml headers can be included
+            set(CMAKE_REQUIRED_INCLUDES "${LIBXML2_INCLUDE_DIR}")
+            check_include_files("libxml/parser.h" LIBXML2_INCL_OK)
+            if(NOT LIBXML2_INCL_OK)
+                #xml headers depend on iconv.h. Test whether adding its path fixes the problem
+                find_path(ICONV_INCLUDE_DIR iconv.h)
+                if(ICONV_INCLUDE_DIR)
+                    set(CMAKE_REQUIRED_INCLUDES "${LIBXML2_INCLUDE_DIR};${ICONV_INCLUDE_DIR}")
+                    unset(LIBXML2_INCL_OK CACHE)
+                    check_include_files("libxml/parser.h" LIBXML2_INCL_OK)
+                    set(LIBXML2_INCLUDE_DIR "${LIBXML2_INCLUDE_DIR};${ICONV_INCLUDE_DIR}" CACHE PATH "Libxml2 include path" FORCE)
+                endif()
+            endif()
+            set(${VARIABLE} ${LIBXML2_INCL_OK} PARENT_SCOPE)
+        else()
+            set(${VARIABLE} OFF PARENT_SCOPE)
+        endif()
     else()
         set(${VARIABLE} OFF PARENT_SCOPE)
     endif()
index 49ea326b12b88a1a24e4ab648f75712aac0b8e20..f58429a23916aa1436d67214d77ed2b96b99b480 100644 (file)
@@ -265,6 +265,8 @@ elseif(${GMX_SIMD} STREQUAL "IBM_QPX")
 
 elseif(${GMX_SIMD} STREQUAL "SPARC64_HPC_ACE")
 
+    # Note that GMX_RELAXED_DOUBLE_PRECISION is enabled by default in the top-level CMakeLists.txt
+
     set(GMX_SIMD_SPARC64_HPC_ACE 1)
     set(SIMD_STATUS_MESSAGE "Enabling Sparc64 HPC-ACE SIMD instructions")
 
index d34a3b5d67c9c3280651c36cf34454b7a942ed53..3ce470db15e828f68a464632aed8621b09552f20 100644 (file)
@@ -931,7 +931,7 @@ The recommended configuration is to use
 
     cmake .. -DCMAKE_C_COMPILER=mpicc \
              -DCMAKE_CXX_COMPILER=mpicxx \
-             -DCMAKE_TOOLCHAIN_FILE=Platform/BlueGeneQ-static-XL-CXX \
+             -DCMAKE_TOOLCHAIN_FILE=Platform/BlueGeneQ-static-XL-CXX.cmake \
              -DCMAKE_PREFIX_PATH=/your/fftw/installation/prefix \
              -DGMX_MPI=ON \
              -DGMX_BUILD_MDRUN_ONLY=ON
@@ -964,8 +964,25 @@ add it. The default plain C kernels will work.
 
 This is the architecture of the K computer, which uses Fujitsu
 `Sparc64VIIIfx` chips. On this platform, GROMACS @PROJECT_VERSION@ has
-accelerated group kernels, no accelerated Verlet kernels, and a custom
-build toolchain.
+accelerated group kernels using the HPC-ACE instructions, no
+accelerated Verlet kernels, and a custom build toolchain. Since this
+particular chip only does double precision SIMD, the default setup
+is to build Gromacs in double. Since most users only need single, we have added
+an option GMX_RELAXED_DOUBLE_PRECISION to accept single precision square root
+accuracy in the group kernels; unless you know that you really need 15 digits
+of accuracy in each individual force, we strongly recommend you use this. Note
+that all summation and other operations are still done in double.
+
+The recommended configuration is to use
+
+    cmake .. -DCMAKE_TOOLCHAIN_FILE=Toolchain-Fujitsu-Sparc64-mpi.cmake \
+             -DCMAKE_PREFIX_PATH=/your/fftw/installation/prefix \
+             -DCMAKE_INSTALL_PREFIX=/where/gromacs/should/be/installed \
+             -DGMX_MPI=ON \
+             -DGMX_BUILD_MDRUN_ONLY=ON \
+             -DGMX_RELAXED_DOUBLE_PRECISION=ON
+    make
+    make install
 
 ### Intel Xeon Phi ###
 
index 5c3d5319560ce9dfc45426718e690e145e32890f..a827daebed40fe328f9b9ae61946ff3262da2743 100644 (file)
@@ -1,7 +1,7 @@
 #
 # This file is part of the GROMACS molecular simulation package.
 #
-# Copyright (c) 2012,2013, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
@@ -43,6 +43,6 @@
 #
 macro(get_compiler_info LANGUAGE BUILD_COMPILER BUILD_FLAGS)
     set(${BUILD_COMPILER} "${CMAKE_${LANGUAGE}_COMPILER} ${CMAKE_${LANGUAGE}_COMPILER_ID} ${CMAKE_${LANGUAGE}_COMPILER_VERSION}")
-    string(TOUPPER ${CMAKE_BUILD_TYPE} _build_type)
+    string(TOUPPER "${CMAKE_BUILD_TYPE}" _build_type)
     set(${BUILD_FLAGS} "${CMAKE_${LANGUAGE}_FLAGS} ${CMAKE_${LANGUAGE}_FLAGS_${_build_type}}")
 endmacro()
index 32e4cf00417eed7dd4d539d55b3e1013cfd9d812..4fdf9d7369a3755e0fa73ef3a751dd3a2590dc6b 100644 (file)
@@ -41,6 +41,8 @@
  *
  * \inlibraryapi
  */
+#ifndef GMX_CONFIG_H
+#define GMX_CONFIG_H
 #include "gromacs/utility/gmx_header_config.h"
 
 /* TODO: For now, disable Doxygen warnings from here */
 /* Define if we have zlib */
 #cmakedefine HAVE_ZLIB
 
+#endif
+
 /*! \endcond */
index 47a30b0358ed2652c7d49621f0596238f83e6301..d0552fb3f968f8d0fcb39e45e6ac343d7d2eaf5a 100644 (file)
@@ -60,6 +60,13 @@ include_directories(BEFORE ${GTEST_INCLUDE_DIRS})
 include_directories(BEFORE ${GTEST_DIR})
 include_directories(BEFORE ${GMOCK_INCLUDE_DIRS})
 include_directories(BEFORE ${GMOCK_DIR})
+
+include(CheckCXXCompilerFlag)
+check_cxx_compiler_flag(-Wno-unused-variable HAS_NO_UNUSED_VARIABLE)
+if (HAS_NO_UNUSED_VARIABLE)
+    set_source_files_properties(${GTEST_SOURCES} PROPERTIES COMPILE_FLAGS "-Wno-unused-variable")
+endif()
+
 add_library(gmock STATIC ${UNITTEST_TARGET_OPTIONS} ${GMOCK_SOURCES} ${GTEST_SOURCES})
 set_property(TARGET gmock APPEND PROPERTY COMPILE_DEFINITIONS "${GMOCK_COMPILE_DEFINITIONS}")
 
index 60f77420fb0d6080a123ddd9432b91be08d7752d..23cab2b630d6f95b18716a36d82ccf796cf9ac6b 100644 (file)
@@ -132,7 +132,7 @@ int tMPI_Error_string(int errorcode, char *strn, size_t *resultlen)
 
     if (errorcode != TMPI_ERR_IO)
     {
-#if !(defined( _WIN32 ) || defined( _WIN64 ) )
+#ifndef _MSC_VER
         strncpy(strn, tmpi_errmsg[errorcode], TMPI_MAX_ERROR_STRING);
 #else
         strncpy_s(strn, TMPI_MAX_ERROR_STRING, tmpi_errmsg[errorcode],
@@ -141,7 +141,7 @@ int tMPI_Error_string(int errorcode, char *strn, size_t *resultlen)
     }
     else
     {
-#if !(defined( _WIN32 ) || defined( _WIN64 ) )
+#ifndef _MSC_VER
         snprintf(strn, TMPI_MAX_ERROR_STRING,
                  "%s: %s", tmpi_errmsg[errorcode], strerror(errno));
 #else
index 962e80c923e1f7659e5856c18cac6659b56aabe3..7113a44555cdfc4fd8c13e87677cab00cf022ad2 100644 (file)
 #include <unistd.h>
 #endif
 
+#if defined( _WIN32 ) || defined( _WIN64 )
+#include <windows.h>
+#endif
+
 #ifdef HAVE_SYS_TIME_H
 #include <sys/time.h>
 #endif
index 8bdcbe7b52656a1c363bb9d9d0f59f880bdde06b..c0c455d20a82d75f9ddf266092f21df00008a345 100644 (file)
@@ -9,6 +9,11 @@
 #include "config.h"
 #endif
 
+#ifdef THREAD_WINDOWS
+    #ifdef __MINGW32__
+       #define _WIN32_WINNT 0x0601 /* Windows 7*/
+    #endif
+#endif
 
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
@@ -22,9 +27,7 @@
 #include <dmalloc.h>
 #endif
 
-
-#if !(defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) || defined (__CYGWIN__) || defined (__CYGWIN32__)
-
+#ifndef THREAD_WINDOWS
 
 /* We don't have specific NUMA aware allocators: */
 
@@ -81,11 +84,8 @@ int tMPI_Free_numa(void *ptr)
     Scott Field (sfield@microsoft.com)      Jan-2011
  */
 
-//#define _WIN32_WINNT 0x0601
 #include <windows.h>
 
-
-
 /*
     __declspec(align()) may not be supported by all compilers, so define the
     size of the structure manually to force alignment
@@ -218,11 +218,19 @@ InitNumaHeapSupport(
             return;
         }
 
+#if defined(WIN64) || defined( _WIN64 )
+        hPriorValue = (HANDLE *)InterlockedCompareExchange64(
+                    (LONGLONG volatile *)&g_hHeap,
+                    (LONGLONG) hHeapNew,
+                    0
+                    );
+#else
         hPriorValue = (HANDLE *)InterlockedCompareExchange(
                     (LONG volatile *)&g_hHeap,
                     (LONG) hHeapNew,
                     0
                     );
+#endif
 
         if (hPriorValue != NULL)
         {
index 8da1df67514b046c5195364fb602500fa4019dde..3aee9c52e55c20a7520784c136a51c23ccb128c8 100644 (file)
@@ -196,6 +196,9 @@ struct tMPI_Thread_starter
 };
 
 /* the thread_starter function that sets the thread id */
+#ifdef __MINGW32__
+__attribute__((force_align_arg_pointer))
+#endif
 static void *tMPI_Thread_starter(void *arg)
 {
     struct tMPI_Thread_starter *starter = (struct tMPI_Thread_starter *)arg;
index 4d15b931de4c32bc5aa86bc670e51c67fc2d8846..0728618d30b0f1e19cfdcdf3aa0949ef7f64051e 100644 (file)
@@ -810,7 +810,7 @@ int tMPI_Get_processor_name(char *name, int *resultlen)
             digits = 1;
         }
     }
-#if !(defined( _WIN32 ) || defined( _WIN64 ) )
+#ifndef _MSC_VER
     strcpy(name, "thread #");
 #else
     strncpy_s(name, TMPI_MAX_PROCESSOR_NAME, "thread #", TMPI_MAX_PROCESSOR_NAME);
index f78dd73553c3a340f69490c0a00f4a012234d527..98a06ace802ffdc4f7a18cc09cd69394b117fc29 100644 (file)
 #ifdef THREAD_WINDOWS
 
 /* the win32 header */
+#ifdef __MINGW32__
+/* Couple of types (e.g. PROCESSOR_NUMBER) are only available since
+ * WinServer2008 (0x600) and Windows7 (0x601). MingW doesn't have
+ * it defined for 0x600 in the headers */
+#define _WIN32_WINNT 0x0601
+#endif
 #include <windows.h>
 
 
@@ -66,6 +72,7 @@
 #include "thread_mpi/atomic.h"
 #include "thread_mpi/threads.h"
 #include "impl.h"
+#include "unused.h"
 
 #include "winthreads.h"
 
@@ -686,6 +693,9 @@ struct tMPI_Thread_starter_param
     struct tMPI_Thread *thread;
 };
 
+#ifdef __GNUC__
+__attribute__((force_align_arg_pointer))
+#endif
 static DWORD WINAPI tMPI_Win32_thread_starter( LPVOID lpParam )
 {
     struct tMPI_Thread_starter_param *prm =
@@ -823,7 +833,7 @@ int tMPI_Thread_join(tMPI_Thread_t thread, void **value_ptr)
 }
 
 
-void tMPI_Thread_exit(void *value_ptr)
+void tMPI_Thread_exit(void tmpi_unused *value_ptr)
 {
     /* TODO: call destructors for thread-local storage */
     ExitThread( 0 );
@@ -1050,7 +1060,7 @@ int tMPI_Thread_mutex_unlock(tMPI_Thread_mutex_t *mtx)
 
 
 
-int tMPI_Thread_key_create(tMPI_Thread_key_t *key, void (*destructor)(void *))
+int tMPI_Thread_key_create(tMPI_Thread_key_t *key, void (*destructor)(void *) tmpi_unused)
 {
     if (key == NULL)
     {
index 42119a5328863cd1dbfb538ef71da68fc2b49237..51c1d9b5935f89fa64dc6c2f78c020b21b2157cd 100644 (file)
@@ -336,13 +336,18 @@ typedef unsigned __int64 uint64_t;
 
 #endif /* USE_STD_INTTYPES_H */
 
-
 #ifndef USE_WINDOWS
 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
 #define USE_WINDOWS
 #endif /* win32... */
 #endif /* not defined USE_WINDOWS */
 
+#ifdef USE_WINDOWS
+#define TNG_PRIsize "Iu"
+#else
+#define TNG_PRIsize "zu"
+#endif
+
 #ifndef DECLSPECDLLEXPORT
 #ifdef USE_WINDOWS
 #define DECLSPECDLLEXPORT __declspec(dllexport)
index c388f789bfcbae367f3e583bde0c91098c41631f..074b781317ebdc50c78b2455d25d9462affbe095 100644 (file)
 #include "compression/tng_compress.h"
 #include "tng/version.h"
 
-#ifdef _MSC_VER
-#define fseeko _fseeki64
-#define ftello _ftelli64
+#if defined( _WIN32 ) || defined( _WIN64 )
+    #ifndef fseeko
+        #define fseeko _fseeki64
+    #endif
+    #ifndef ftello
+        #ifdef __MINGW32__
+            #define ftello ftello64
+        #else
+            #define ftello _ftelli64
+        #endif
+    #endif
 #endif
 
 struct tng_bond {
@@ -743,7 +751,7 @@ static tng_function_status tng_block_init(struct tng_gen_block **block_p)
     *block_p = malloc(sizeof(struct tng_gen_block));
     if(!*block_p)
     {
-        fprintf(stderr, "TNG library: Cannot allocate memory (%lu bytes). %s: %d\n",
+        fprintf(stderr, "TNG library: Cannot allocate memory (%"TNG_PRIsize" bytes). %s: %d\n",
                sizeof(struct tng_gen_block), __FILE__, __LINE__);
         return(TNG_CRITICAL);
     }
@@ -4731,7 +4739,7 @@ static tng_function_status tng_particle_data_block_create
                     frame_set->n_particle_data_blocks);
         if(!data)
         {
-            fprintf(stderr, "TNG library: Cannot allocate memory (%lu bytes). %s: %d\n",
+            fprintf(stderr, "TNG library: Cannot allocate memory (%"TNG_PRIsize" bytes). %s: %d\n",
                 sizeof(struct tng_particle_data) *
                 frame_set->n_particle_data_blocks,
                 __FILE__, __LINE__);
@@ -4749,7 +4757,7 @@ static tng_function_status tng_particle_data_block_create
                         tng_data->n_particle_data_blocks);
         if(!data)
         {
-            fprintf(stderr, "TNG library: Cannot allocate memory (%lu bytes). %s: %d\n",
+            fprintf(stderr, "TNG library: Cannot allocate memory (%"TNG_PRIsize" bytes). %s: %d\n",
                     sizeof(struct tng_particle_data) *
                     tng_data->n_particle_data_blocks,
                     __FILE__, __LINE__);
@@ -5995,7 +6003,7 @@ static tng_function_status tng_particle_data_block_write
         temp_name = realloc(block->name, len);
         if(!temp_name)
         {
-            fprintf(stderr, "TNG library: Cannot allocate memory (%lud bytes). %s: %d\n", len,
+            fprintf(stderr, "TNG library: Cannot allocate memory (%"TNG_PRIsize" bytes). %s: %d\n", len,
                    __FILE__, __LINE__);
             free(block->name);
             block->name = 0;
@@ -6449,7 +6457,7 @@ static tng_function_status tng_data_block_create
                        frame_set->n_data_blocks);
         if(!data)
         {
-            fprintf(stderr, "TNG library: Cannot allocate memory (%lu bytes). %s: %d\n",
+            fprintf(stderr, "TNG library: Cannot allocate memory (%"TNG_PRIsize" bytes). %s: %d\n",
                 sizeof(struct tng_non_particle_data) * frame_set->n_data_blocks,
                 __FILE__, __LINE__);
             free(frame_set->tr_data);
@@ -6465,7 +6473,7 @@ static tng_function_status tng_data_block_create
                         tng_data->n_data_blocks);
         if(!data)
         {
-            fprintf(stderr, "TNG library: Cannot allocate memory (%lu bytes). %s: %d\n",
+            fprintf(stderr, "TNG library: Cannot allocate memory (%"TNG_PRIsize" bytes). %s: %d\n",
                 sizeof(struct tng_non_particle_data) * tng_data->n_data_blocks,
                 __FILE__, __LINE__);
             free(tng_data->non_tr_data);
@@ -9103,7 +9111,7 @@ tng_function_status DECLSPECDLLEXPORT tng_molecule_alloc(const tng_trajectory_t
     *molecule_p = malloc(sizeof(struct tng_molecule));
     if(!*molecule_p)
     {
-        fprintf(stderr, "TNG library: Cannot allocate memory (%lu bytes). %s: %d\n",
+        fprintf(stderr, "TNG library: Cannot allocate memory (%"TNG_PRIsize" bytes). %s: %d\n",
                sizeof(struct tng_molecule), __FILE__, __LINE__);
         return(TNG_CRITICAL);
     }
@@ -9790,7 +9798,7 @@ tng_function_status DECLSPECDLLEXPORT tng_trajectory_init(tng_trajectory_t *tng_
     *tng_data_p = malloc(sizeof(struct tng_trajectory));
     if(!*tng_data_p)
     {
-        fprintf(stderr, "TNG library: Cannot allocate memory (%lu bytes). %s: %d\n",
+        fprintf(stderr, "TNG library: Cannot allocate memory (%"TNG_PRIsize" bytes). %s: %d\n",
                sizeof(struct tng_trajectory), __FILE__, __LINE__);
         return(TNG_CRITICAL);
     }
@@ -10311,7 +10319,7 @@ tng_function_status DECLSPECDLLEXPORT tng_trajectory_init_from_src(tng_trajector
     *dest_p = malloc(sizeof(struct tng_trajectory));
     if(!*dest_p)
     {
-        fprintf(stderr, "TNG library: Cannot allocate memory (%lu bytes). %s: %d\n",
+        fprintf(stderr, "TNG library: Cannot allocate memory (%"TNG_PRIsize" bytes). %s: %d\n",
                sizeof(struct tng_trajectory), __FILE__, __LINE__);
         return(TNG_CRITICAL);
     }
index a060cf6e18859a0bce2f0eb214353866f6cd55d8..ed9f35e6a76b6da2b3777edf31e74443ac1a88e7 100644 (file)
@@ -153,7 +153,7 @@ const char *vmddlerror( void  ) {
   return errorString;
 }
 
-#elif defined(_MSC_VER)
+#elif defined( _WIN32 ) || defined( _WIN64 )
 
 #include <windows.h>
 
@@ -165,7 +165,7 @@ const char *vmddlerror(void) {
   static CHAR szBuf[80]; 
   DWORD dw = GetLastError(); 
  
-  sprintf(szBuf, "vmddlopen failed: GetLastError returned %u\n", dw); 
+  sprintf(szBuf, "vmddlopen failed: GetLastError returned %lu\n", dw);
   return szBuf;
 }
 
index ac5f422d332e10ec9314c46f24854cefd39ab64f..b026f8b0db6382f5ac1fec59895ca35c8d7c8188 100644 (file)
@@ -578,7 +578,7 @@ int CommandLineModuleManager::run(int argc, char *argv[])
         fprintf(stderr, "Will write debug log file: %s\n", filename.c_str());
         gmx_init_debug(optionsHolder.debugLevel(), filename.c_str());
     }
-#if defined(HAVE_UNISTD_H) && !defined(GMX_NO_NICE)
+#if defined(HAVE_UNISTD_H) && !defined(GMX_NO_NICE) && !defined(__MINGW32__)
     // Set the nice level unless disabled in the configuration.
     if (optionsHolder.niceLevel() != 0)
     {
index 20932d54c37627bcd7e91c4f145d2701f3058619..b677014564098d9ab2c65d5d4cf6a220ea1ec600 100644 (file)
@@ -150,7 +150,7 @@ void gmx_tng_open(const char       *filename,
 //             tng_last_program_name_set(*tng, programInfo);
 //         }
 
-#ifdef HAVE_UNISTD_H
+#if defined(HAVE_UNISTD_H) && !defined(__MINGW32__)
         char username[256];
         if (!getlogin_r(username, 256))
         {
index 670aef0d7ba73440e59a0f4fa4c3f69b212dcbad..b4ed95239e8e7eb757ca71064385ad310a290a16 100644 (file)
@@ -99,6 +99,9 @@
 #ifndef GMX_NATIVE_WINDOWS
 #include <glob.h>
 #else
+#ifndef _WIN32_IE
+#define _WIN32_IE 0x0500 /* SHGetFolderPath is available since WinXP/IE5 */
+#endif
 #include <windows.h>
 #include <shlobj.h>
 #endif
index 9162172255cd75e9164027d12f1717640173565d..b2c9c37e9704f25be89963b3a9893a7e9605d61a 100644 (file)
@@ -177,7 +177,11 @@ gmx_wintruncate(const char *filename, __int64 size)
         return -1;
     }
 
+#ifdef _MSC_VER
     return _chsize_s( fileno(fp), size);
+#else
+    return _chsize( fileno(fp), size);
+#endif
 #endif
 }
 #endif
index bdbda35b857433adc6fc995dc075206ba286ce12..c786c0e5bb1324a72de26c3f1ed3ca71f5545a17 100644 (file)
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
-#ifdef _MSC_VER
+#ifdef GMX_NATIVE_WINDOWS
 /* MSVC definition for __cpuid() */
-#include <intrin.h>
+    #ifdef _MSC_VER
+        #include <intrin.h>
+    #endif
 /* sysinfo functions */
-#include <windows.h>
+    #include <windows.h>
 #endif
 #ifdef HAVE_UNISTD_H
 /* sysconf() definition */
-#include <unistd.h>
+    #include <unistd.h>
 #endif
 
 #include "gromacs/legacyheaders/gmx_cpuid.h"
index 385856f71ddef3c5bdc01760f4c0f62b76b1fc0f..703ec4cf713c4196ed8053800fc497f707ddda66 100644 (file)
@@ -35,7 +35,7 @@
 #include "gmxpre.h"
 
 #include "config.h"
-#if defined(HAVE_SCHED_H)
+#ifdef HAVE_SCHED_AFFINITY
 #  ifndef _GNU_SOURCE
 #    define _GNU_SOURCE 1
 #  endif
index eb63d9a8650600fd25c9d9ae18a141f8f58d026d..fc6ca9bc67dad6cccce09f9ac0bbd7da00f6cb7c 100644 (file)
@@ -65,8 +65,9 @@ gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
 #define  NSTATES  2
     int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
     real          shX, shY, shZ;
-    real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
-    real          Vcoul[NSTATES], Vvdw[NSTATES];
+    real          tx, ty, tz, Fscal;
+    double        FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
+    double        Vcoul[NSTATES], Vvdw[NSTATES];     /* Needs double for sc_power==48 */
     real          rinv6, r, rt, rtC, rtV;
     real          iqA, iqB;
     real          qq[NSTATES], vctot, krsq;
@@ -79,7 +80,7 @@ gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
     double        dvdl_coul, dvdl_vdw;
     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
-    real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
+    double        rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV; /* Needs double for sc_power==48 */
     real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
     int           do_tab, tab_elemsize;
     int           n0, n1C, n1V, nnn;
index 6f01d2dfef650b9e613aa8d19f21250fc6c6c978..923f4bb9fda9ced070ee7c852a9105b6eff67af9 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 #ifndef _kernelutil_sparc64_hpc_ace_double_h_
 #define _kernelutil_sparc64_hpc_ace_double_h_
 
+/* Get gmx_simd_exp_d() */
+#include "gromacs/simd/simd.h"
+#include "gromacs/simd/simd_math.h"
+
 /* Fujitsu header borrows the name from SSE2, since some instructions have aliases */
 #include <emmintrin.h>
 
index 2e87bd52bdef2c21781d998eada64b5986c8f349..3c8b96e7ba50134bef697e14f05f2e65e0334819 100644 (file)
@@ -297,7 +297,8 @@ nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -629,7 +630,8 @@ nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index fbd62bab82ff2d2003417b8e6b9d69d7d1dc8577..310216adb6a5c8f272caf84cb46287c1606f64d4 100644 (file)
@@ -437,7 +437,8 @@ nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -1001,7 +1002,8 @@ nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 5135e7bad0b90f401277ddaa347e5b90cd65e687..f1f9b8f2af375906756f25fc720986a2ca7ab47e 100644 (file)
@@ -483,7 +483,8 @@ nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -1125,7 +1126,8 @@ nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index e5c35edec6eca0ece4f07f1593246e3b72bdec80..fe3259d2276fbea632ec9b798535e9b0fd84c048 100644 (file)
@@ -279,7 +279,8 @@ nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -573,7 +574,8 @@ nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 8c6b0ab5b80f62772570905c0b11705c1cb2b6d5..31d53447dce169070134feb7d8a2388ee04e643b 100644 (file)
@@ -419,7 +419,8 @@ nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -945,7 +946,8 @@ nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 9ae48ae1059782d5fb15627a444ad84685e05b4a..cb81142c880ddb2a1bded8e700cf3c2fba45cbaf 100644 (file)
@@ -449,7 +449,8 @@ nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
              **************************/
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
@@ -1027,7 +1028,8 @@ nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
              **************************/
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
index 86f65975557357f4639504f28fea4234233b4283..111f202a7e679fced29cb87140cc29c879651fc6 100644 (file)
@@ -290,7 +290,8 @@ nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -606,7 +607,8 @@ nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 473ac3463ea7012c1bf945e4205474e50560e58c..e69261ef63760f33d00fd5970bff84c07786407c 100644 (file)
@@ -392,7 +392,8 @@ nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -864,7 +865,8 @@ nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 32c465bdeec836f8d691ccf3616194518c32b8da..48d2976c68d9a421275291b912668aadde594ab9 100644 (file)
@@ -426,7 +426,8 @@ nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -952,7 +953,8 @@ nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index c323d5340503026d27dcaf9679d66fd51f57eb70..42d8daba0324f870edf4f9b61ea5b64ace2c9d18 100644 (file)
@@ -251,7 +251,8 @@ nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* COULOMB ELECTROSTATICS */
             velec            = _fjsp_mul_v2r8(qq00,rinv00);
@@ -500,7 +501,8 @@ nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* COULOMB ELECTROSTATICS */
             velec            = _fjsp_mul_v2r8(qq00,rinv00);
index af0cf054c2ea068503adc9df163b3de82fd02fca..fa424c4e4a1314e980957c8f254d08f9e4a2e3f9 100644 (file)
@@ -353,7 +353,8 @@ nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* COULOMB ELECTROSTATICS */
             velec            = _fjsp_mul_v2r8(qq00,rinv00);
@@ -758,7 +759,8 @@ nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* COULOMB ELECTROSTATICS */
             velec            = _fjsp_mul_v2r8(qq00,rinv00);
index 95667b3a43ae31370a4efbb7f854f93f0763ba06..2beed1988a424283684abaac9e7e945353bc59bc 100644 (file)
@@ -387,7 +387,8 @@ nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
              **************************/
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
@@ -846,7 +847,8 @@ nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
              **************************/
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
index 881a610e6884897dbbc2077e657af5d3cf80412c..652086ce891c293605dab91d23b796d4c78ea1f4 100644 (file)
@@ -246,11 +246,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
             vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
@@ -318,9 +318,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
-            c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
 
             /* EWALD ELECTROSTATICS */
 
@@ -345,11 +347,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
             vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
@@ -595,11 +597,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
@@ -660,9 +662,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
-            c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
 
             /* EWALD ELECTROSTATICS */
 
@@ -680,11 +684,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index c2b63eb9fa0a5e6ecda9f4f1e46ba046c699aefd..046fb5655cf438140c82da1754e47ce1dfb34bdd 100644 (file)
@@ -280,11 +280,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
             vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
@@ -476,9 +476,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
-            c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
 
             /* EWALD ELECTROSTATICS */
 
@@ -503,11 +505,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
             vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
@@ -901,11 +903,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
@@ -1070,9 +1072,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
-            c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
 
             /* EWALD ELECTROSTATICS */
 
@@ -1090,11 +1094,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index c270f75362d8e41f56c62ba8aff546e320a9343c..e3760142fd89d01b2c7ac6451964970cae937257 100644 (file)
@@ -342,11 +342,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
             vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
@@ -885,11 +885,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
             vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
@@ -1651,11 +1651,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
@@ -2100,11 +2100,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index 90c02266aafda361917580a4311d73985e453cb1..d8e80bef4334bbcaeea57172bfcae301e0fd7d77 100644 (file)
@@ -273,11 +273,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
             vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
@@ -524,19 +524,21 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_VF_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
-            c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
 
             /* Analytical LJ-PME */
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
             vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
@@ -981,11 +983,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
@@ -1197,19 +1199,21 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_F_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
-            c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
 
             /* Analytical LJ-PME */
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index 7f3df8cfb4eab4da5119256506c0508b58cfbcc3..5dabbd9a3045d1c47be11df312e44b24013278a8 100644 (file)
@@ -342,11 +342,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
             vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
@@ -923,11 +923,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
             vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
@@ -1744,11 +1744,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
@@ -2230,11 +2230,11 @@ nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index 444fd809fb9df4f62ca659569329e558920f2a98..35be03c0ec1ff9c450f90f4e7d607fa771696d4d 100644 (file)
@@ -300,7 +300,8 @@ nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* EWALD ELECTROSTATICS */
 
@@ -613,7 +614,8 @@ nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* EWALD ELECTROSTATICS */
 
index 97519cd9badac14e4ed01c7b2a01887ada6b7d41..5590857dc8bd18e3d1d67f8a84fb62661099f5a6 100644 (file)
@@ -456,7 +456,8 @@ nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* EWALD ELECTROSTATICS */
 
@@ -1019,7 +1020,8 @@ nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* EWALD ELECTROSTATICS */
 
index 2e14a949d650a8e70dae73bf67b8630d0b997709..662c00981490f417001c68aec8b1da66bdf1b5ab 100644 (file)
@@ -497,7 +497,8 @@ nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double
             {
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
@@ -1132,7 +1133,8 @@ nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double
             {
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
index e794bdbecf456baca2a06102344bcb381d746c03..02c75bbaf563824ff3922da8ed8fd195171216c9 100644 (file)
@@ -323,7 +323,8 @@ nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* EWALD ELECTROSTATICS */
 
@@ -679,7 +680,8 @@ nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* EWALD ELECTROSTATICS */
 
index 51771ffc566f57ed40977ad96125a456b4bdb61d..86534b4b33c3c9e306c3d9dec11a6be846073fba 100644 (file)
@@ -501,7 +501,8 @@ nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* EWALD ELECTROSTATICS */
 
@@ -1161,7 +1162,8 @@ nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* EWALD ELECTROSTATICS */
 
index 1ce2359ed68cdc2725d4455361073cbf88d8db03..e0e9c060889f2de130c7e7a8c941a0a939029e3a 100644 (file)
@@ -557,7 +557,8 @@ nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
@@ -1313,7 +1314,8 @@ nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
index 976003f3e77c8c7acda7488d0eab1a48aed4d9d2..dfdcbe424bb81b62079148db5715cecc237f0034 100644 (file)
@@ -312,7 +312,8 @@ nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -659,7 +660,8 @@ nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 1b40bdd173f23e38fb6b4f3544454e5f30c4a90c..a5df390d684d5c47d370d3bfdc6b2b7502a603b0 100644 (file)
@@ -448,7 +448,8 @@ nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -1007,7 +1008,8 @@ nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 37ce31c72ac19d3b9821871450e842e766d4d431..0cb4333284755557664e4937fa0ee44b66a1879f 100644 (file)
@@ -484,7 +484,8 @@ nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -1101,7 +1102,8 @@ nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index ebd45fa7e79351ef67bbe0dbeec9005acb987add..a4e5a843659e0ec40c4c2c2003d74af6f14eb3e8 100644 (file)
@@ -235,11 +235,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
             /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@ -295,9 +295,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
-            c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
 
             /* EWALD ELECTROSTATICS */
 
@@ -322,11 +324,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
             /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@ -552,11 +554,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
@@ -608,9 +610,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
-            c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
 
             /* EWALD ELECTROSTATICS */
 
@@ -628,11 +632,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index 8208eccb87f9f728978cbfcd493ac0d39509f832..02c004d7975a15bc1d657dd745005449a0f377e3 100644 (file)
@@ -269,11 +269,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
             /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@ -433,9 +433,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
-            c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
 
             /* EWALD ELECTROSTATICS */
 
@@ -460,11 +462,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
             /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@ -818,11 +820,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomW3P1_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
@@ -960,9 +962,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomW3P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
-            c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
 
             /* EWALD ELECTROSTATICS */
 
@@ -980,11 +984,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomW3P1_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index c51a737d841dfbfbec9abb8b1f70fef4e276e779..34304fc32dfeda9dc9b1f1d117d3b844078e10a8 100644 (file)
@@ -331,11 +331,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomW3W3_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
             /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@ -782,11 +782,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomW3W3_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
             /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@ -1448,11 +1448,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomW3W3_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
@@ -1816,11 +1816,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomW3W3_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index 7eae21170b373d7a86fa65099418a3623f75193a..d15a704666dbc0ca481693d09cf40c50ed83c7e4 100644 (file)
@@ -262,11 +262,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
             /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@ -472,19 +472,21 @@ nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
-            c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
 
             /* Analytical LJ-PME */
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
             /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@ -880,11 +882,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
@@ -1060,19 +1062,21 @@ nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
-            c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
 
             /* Analytical LJ-PME */
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index c4e4135fa95e75c1afb8f762f30003a8add2c83e..379f2e65043a0d58216fd1d63203c2cf19364064 100644 (file)
@@ -331,11 +331,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomW4W4_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
             /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@ -811,11 +811,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomW4W4_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
             /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@ -1523,11 +1523,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomW4W4_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
@@ -1919,11 +1919,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomW4W4_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index e03a0ef48dfda338abac7b9902587ff66d73c1fd..be7bb6e5781d120f661c6dba998ba0f47770f304 100644 (file)
@@ -277,7 +277,8 @@ nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* EWALD ELECTROSTATICS */
 
@@ -561,7 +562,8 @@ nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* EWALD ELECTROSTATICS */
 
index 754f61ed6a6fcb2b6927b7e43689feba25a96945..6673a44eb2aec143565a530d82ee90e6fdae1598 100644 (file)
@@ -413,7 +413,8 @@ nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* EWALD ELECTROSTATICS */
 
@@ -909,7 +910,8 @@ nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* EWALD ELECTROSTATICS */
 
index b596997fa0817e66f977e2bf38ccb4c1ff4eda00..5189a947bff21b54142f08a805403015efffa1f3 100644 (file)
@@ -445,7 +445,8 @@ nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
              **************************/
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
@@ -995,7 +996,8 @@ nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
              **************************/
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
index 0b371e50b7589b1e435e205809d8defcee417433..141f61f8243dd24fbab18a4ae2883e5cf745bc03 100644 (file)
@@ -329,7 +329,8 @@ nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -716,7 +717,8 @@ nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index e4740b92f178608862f3397bf0cc48c2c1dfd0f2..b9c8f053759009a6351bde2d037f8e83989cccfa 100644 (file)
@@ -300,7 +300,8 @@ nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
             isaprod          = _fjsp_mul_v2r8(isai0,isaj0);
@@ -630,7 +631,8 @@ nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
             isaprod          = _fjsp_mul_v2r8(isai0,isaj0);
index 002a7f36d600685929e146c925a80fa3475b2d68..8547907a362f257b2d94dec45ceeda60ecd742e2 100644 (file)
@@ -271,7 +271,8 @@ nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -563,7 +564,8 @@ nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 74146bbff009c34ffa89c426b652181aa9e1a827..c096e05f1b5c9b8e054729de9b82a659e68ce993 100644 (file)
@@ -211,11 +211,11 @@ nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
             vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
@@ -279,19 +279,21 @@ nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
-            c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
 
             /* Analytical LJ-PME */
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
             vvdw             = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
                                _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
@@ -505,11 +507,11 @@ nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
@@ -568,19 +570,21 @@ nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
-            c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
 
             /* Analytical LJ-PME */
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index 1328f231c3e33cbb7460957297420e21f45d271a..6c52337bce5fc4f85462c6990d7098d0ea571091 100644 (file)
@@ -201,11 +201,11 @@ nb_kernel_ElecNone_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
             /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@ -258,19 +258,21 @@ nb_kernel_ElecNone_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
-            c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
 
             /* Analytical LJ-PME */
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
             /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@ -466,11 +468,11 @@ nb_kernel_ElecNone_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
@@ -520,19 +522,21 @@ nb_kernel_ElecNone_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
-            c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
 
             /* Analytical LJ-PME */
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index c23fe6ee27764bbc79c8982569dcec5ed3b409f9..638ef0abb58e0dbaf43502a65b8901fcf6f91116 100644 (file)
@@ -253,7 +253,8 @@ nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double
             {
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
@@ -505,7 +506,8 @@ nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double
             {
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
index 64bf5ad01cce1e72de2b70cd299491710f1089ff..515cac9eba6b504c96e9e38db526e080c260d8b5 100644 (file)
@@ -282,7 +282,8 @@ nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
@@ -576,7 +577,8 @@ nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
index 61c4e883513b8696807350ea04c3d2b81dde0a02..b62a36499ee0a721dd4fe092d81a123019dafc4e 100644 (file)
@@ -232,7 +232,8 @@ nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
              **************************/
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
@@ -457,7 +458,8 @@ nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
              **************************/
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
index 979df716bc0fb9cfd80724c78389182d889bd4ab..e4367156769bc86d64093441ed2ccfc876179b6e 100644 (file)
@@ -312,7 +312,8 @@ nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -655,7 +656,8 @@ nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 3e63f540457656f6bdbad12f6556ff043f852462..c7fa1728ea48d79ae3db01fe3ef14144f163c389 100644 (file)
@@ -434,7 +434,8 @@ nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -969,7 +970,8 @@ nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index c99ca20f97fb5c3418b7a8392957d7423b34f2f9..495a32a634549e946dfdb35e583d8c7a8c833cd7 100644 (file)
@@ -464,7 +464,8 @@ nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -1052,7 +1053,8 @@ nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 1d1a34200d2deaac43bad50f89af180c2fba20fb..95c1bf4946b8f1ac0e2c0549568c93c989e6d72f 100644 (file)
@@ -277,7 +277,8 @@ nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* REACTION-FIELD ELECTROSTATICS */
             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
@@ -557,7 +558,8 @@ nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* REACTION-FIELD ELECTROSTATICS */
             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
index 7743a69904257ae429b9db1dbbd7752f24ecc2f8..83148ea8666e9610d92b23dde1b56cc6743a09be 100644 (file)
@@ -399,7 +399,8 @@ nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* REACTION-FIELD ELECTROSTATICS */
             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
@@ -871,7 +872,8 @@ nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* REACTION-FIELD ELECTROSTATICS */
             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
index 0f9b3421192e80d527a19a199af465b41a302f03..1b908815ef9a31953319ae33629a6214516908b7 100644 (file)
@@ -442,7 +442,8 @@ nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double
             {
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
@@ -986,7 +987,8 @@ nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double
             {
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
index 334a041e6d2355c4e73af6a52d43b4b1d22386d7..55b463e8e77b3416154646be5a4d59325287072d 100644 (file)
@@ -302,7 +302,8 @@ nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* REACTION-FIELD ELECTROSTATICS */
             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
@@ -620,7 +621,8 @@ nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* REACTION-FIELD ELECTROSTATICS */
             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
index cc18c0aa47ccf734638a53a70b3a4420190838b4..680a0df90edef7d448e5234a6a155cfa10dae838 100644 (file)
@@ -424,7 +424,8 @@ nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* REACTION-FIELD ELECTROSTATICS */
             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
@@ -934,7 +935,8 @@ nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* REACTION-FIELD ELECTROSTATICS */
             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
index cf9c1edbb31d3e776e5ad0e58996c72ab33a5b9d..ded5f63a03f6b350d108561d473507c060065754 100644 (file)
@@ -469,7 +469,8 @@ nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
@@ -1053,7 +1054,8 @@ nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
index 4bedaed7d9fae6e8656583a66cfaf846411aa920..0b70695a78d7408dcddec8ece6a3c03461f64628 100644 (file)
@@ -293,7 +293,8 @@ nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -611,7 +612,8 @@ nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index fdcc647dfafd04d3366e775783451e6485d81bde..4e13180e65747768d9c6f476b2b740d0548a879c 100644 (file)
@@ -395,7 +395,8 @@ nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -867,7 +868,8 @@ nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index c7fa4c6f074ef31e14f5bbc464f1566b02b21d5c..ad8da6d53710d93f5804f7f5a01596e5e4f76c46 100644 (file)
@@ -429,7 +429,8 @@ nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
@@ -955,7 +956,8 @@ nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* Calculate table index by multiplying r with table scale and truncate to integer */
             rt               = _fjsp_mul_v2r8(r00,vftabscale);
index 7d497dab08ee7fb1c85d2d55b310e97146845308..24b01bae5f5ddae297619eb6b283df389c1e0383 100644 (file)
@@ -254,7 +254,8 @@ nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* REACTION-FIELD ELECTROSTATICS */
             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
@@ -505,7 +506,8 @@ nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* REACTION-FIELD ELECTROSTATICS */
             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
index c3524155aa1fd8b0089d1da08d0e0bfe48091a67..00fcc3d7133c687106cffc8bb1fe54bb1f037610 100644 (file)
@@ -356,7 +356,8 @@ nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* REACTION-FIELD ELECTROSTATICS */
             velec            = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
@@ -761,7 +762,8 @@ nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
 
             /* Compute parameters for interactions between i and j atoms */
             qq00             = _fjsp_mul_v2r8(iq0,jq0);
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* REACTION-FIELD ELECTROSTATICS */
             felec            = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
index e9e38c2b4c881ab2e5ffe1e2490f31458f6ad5bc..7df06226d85858006cee842adb397edc3cadf4dc 100644 (file)
@@ -390,7 +390,8 @@ nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
              **************************/
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
@@ -849,7 +850,8 @@ nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
              **************************/
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
             /* LENNARD-JONES DISPERSION/REPULSION */
 
index 07fed742c850b5e5ee3064f59953cf7077bc4e75..2fe3d394922d36a8d08ad29bdd041a94683f9c46 100644 (file)
@@ -543,20 +543,12 @@ void
             /*             #define INNERFLOPS INNERFLOPS+1 */
             /*         #endif */
             /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
-            /*             #if ROUND == 'Loop' */
             gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset{I}+vdwjidx{J}A,
                                          vdwparam+vdwioffset{I}+vdwjidx{J}B,&c6_{I}{J},&c12_{I}{J});
 
-           /*                 #if 'LJEwald' in KERNEL_VDW */
+           /*             #if 'LJEwald' in KERNEL_VDW */
             c6grid_{I}{J}       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset{I}+vdwjidx{J}A,
                                                                    vdwgridparam+vdwioffset{I}+vdwjidx{J}B);
-            /*                 #endif */
-            /*             #else */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset{I}+vdwjidx{J}A,&c6_{I}{J},&c12_{I}{J});
-
-            /*                 #if 'LJEwald' in KERNEL_VDW */
-            c6grid_{I}{J}       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset{I}+vdwjidx{J}A);
-            /*                 #endif */
             /*             #endif */
             /*         #endif */
             /*     #endif */
@@ -858,13 +850,13 @@ void
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq{I}{J},rinvsq{I}{J}),rinvsq{I}{J});
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq{I}{J});
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /*                 #define INNERFLOPS INNERFLOPS+9 */
             /*             #if 'Potential' in KERNEL_VF or KERNEL_MOD_VDW=='PotentialSwitch' */
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_{I}{J},_fjsp_sub_v2r8(one,poly),c6_{I}{J}),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_{I}{J},_fjsp_sub_v2r8(poly,one),c6_{I}{J}),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_{I}{J},_fjsp_mul_v2r8(rinvsix,rinvsix));
             /*                 #define INNERFLOPS INNERFLOPS+5 */
             /*                 #if KERNEL_MOD_VDW=='PotentialShift' */
@@ -883,7 +875,7 @@ void
             /*                  #endif */
             /*              #elif KERNEL_VF=='Force' */
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_{I}{J},_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_{I}{J},_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_{I}{J},one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
index 1dfa19a18eeda552bcc9d15673655f68352c83c8..7fcd21cd9f92c25ea1cd5faafe938c1793414ccd 100644 (file)
@@ -1006,7 +1006,7 @@ void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts,
         sprintf(err_buf, "tau-p must be > 0 instead of %g\n", ir->tau_p);
         CHECK(ir->tau_p <= 0);
 
-        if (ir->tau_p/dt_pcoupl < pcouple_min_integration_steps(ir->epc))
+        if (ir->tau_p/dt_pcoupl < pcouple_min_integration_steps(ir->epc) - 10*GMX_REAL_EPS)
         {
             sprintf(warn_buf, "For proper integration of the %s barostat, tau-p (%g) should be at least %d times larger than nstpcouple*dt (%g)",
                     EPCOUPLTYPE(ir->epc), ir->tau_p, pcouple_min_integration_steps(ir->epc), dt_pcoupl);
@@ -3281,7 +3281,7 @@ void do_index(const char* mdparin, const char *ndx,
         nstcmin = tcouple_min_integration_steps(ir->etc);
         if (nstcmin > 1)
         {
-            if (tau_min/(ir->delta_t*ir->nsttcouple) < nstcmin)
+            if (tau_min/(ir->delta_t*ir->nsttcouple) < nstcmin - 10*GMX_REAL_EPS)
             {
                 sprintf(warn_buf, "For proper integration of the %s thermostat, tau-t (%g) should be at least %d times larger than nsttcouple*dt (%g)",
                         ETCOUPLTYPE(ir->etc),
index 7b3fff08a2b346bacd978bd7dd02a5b2cd614948..9c4da0eb1b8c945e31eb46a52975b60135b32aa9 100644 (file)
@@ -38,6 +38,7 @@
 
 #include "config.h"
 
+#include <assert.h>
 #include <math.h>
 #include <string.h>
 
@@ -87,12 +88,7 @@ void pr_alloc (int extra, t_params *pr)
     {
         return;
     }
-    if ((pr->nr == 0) && (pr->param != NULL))
-    {
-        fprintf(stderr, "Warning: dangling pointer at %lx\n",
-                (unsigned long)pr->param);
-        pr->param = NULL;
-    }
+    assert(!((pr->nr == 0) && (pr->param != NULL)));
     if (pr->nr+extra > pr->maxnr)
     {
         pr->maxnr = max(1.2*pr->maxnr, pr->maxnr + extra);
diff --git a/src/gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace.h b/src/gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace.h
new file mode 100644 (file)
index 0000000..d4a4905
--- /dev/null
@@ -0,0 +1,432 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+#ifndef GMX_SIMD_IMPL_SPARC64_HPC_ACE_H
+#define GMX_SIMD_IMPL_SPARC64_HPC_ACE_H
+
+#include <math.h>
+/* Fujitsu header borrows the name from SSE2, since some instructions have aliases */
+#include <emmintrin.h>
+
+
+/* Sparc64 HPC-ACE SIMD instruction wrappers
+ *
+ * Please see documentation in gromacs/simd/simd.h for defines.
+ */
+
+/* Capability definitions for Sparc64 HPC-ACE */
+/* HPC-ACE is actually double-only on the register level, but we also implement
+ * a single-precision interface where we only offer single-precision accuracy
+ * in math functions - this can save quite a few cycles.
+ */
+#define GMX_SIMD_HAVE_FLOAT
+#define GMX_SIMD_HAVE_DOUBLE
+#define GMX_SIMD_HAVE_HARDWARE
+#undef  GMX_SIMD_HAVE_LOADU
+#undef  GMX_SIMD_HAVE_STOREU
+#define GMX_SIMD_HAVE_LOGICAL
+#define GMX_SIMD_HAVE_FMA
+#undef  GMX_SIMD_HAVE_FRACTION
+#define GMX_SIMD_HAVE_FINT32
+#define GMX_SIMD_HAVE_FINT32_EXTRACT
+#define GMX_SIMD_HAVE_FINT32_LOGICAL
+#undef  GMX_SIMD_HAVE_FINT32_ARITHMETICS
+#define GMX_SIMD_HAVE_DINT32
+#define GMX_SIMD_HAVE_DINT32_EXTRACT
+#define GMX_SIMD_HAVE_DINT32_LOGICAL
+#undef  GMX_SIMD_HAVE_DINT32_ARITHMETICS
+#undef  GMX_SIMD4_HAVE_FLOAT
+#undef  GMX_SIMD4_HAVE_DOUBLE
+
+/* Implementation details */
+#define GMX_SIMD_FLOAT_WIDTH         2
+#define GMX_SIMD_DOUBLE_WIDTH        2
+#define GMX_SIMD_FINT32_WIDTH        2
+#define GMX_SIMD_DINT32_WIDTH        2
+#define GMX_SIMD_RSQRT_BITS         10
+#define GMX_SIMD_RCP_BITS            9
+
+/* HPC-ACE is a bit strange; some instructions like
+ * shifts only work on _integer_ versions of SIMD
+ * registers, but there are no intrinsics to load
+ * or convert, or even to cast. The only way to use
+ * them is to declare unions with the SIMD integer
+ * type. However, this will lead to extra load ops,
+ * and the normal real-to-int and int-to-real
+ * conversions work purely on the v2r8 fp regs.
+ * Since our most common usage is to convert and
+ * then extract the result for table lookups, we
+ * define the gmx_simd_fint32_t datatype to use
+ * the v2r8 rather than v2i8 SIMD type.
+ */
+
+/****************************************************
+ *      SINGLE PRECISION SIMD IMPLEMENTATION        *
+ ****************************************************/
+#define gmx_simd_float_t          _fjsp_v2r8
+#define gmx_simd_load_f           gmx_simd_load_f_sparc64_hpc_ace
+#define gmx_simd_load1_f(m)       _fjsp_set_v2r8((*m), (*m))
+#define gmx_simd_set1_f(a)        _fjsp_set_v2r8(a, a)
+#define gmx_simd_store_f          gmx_simd_store_f_sparc64_hpc_ace
+#define gmx_simd_loadu_f          gmx_simd_load_f
+/* No unaligned store of gmx_simd_float_t */
+#define gmx_simd_setzero_f        _fjsp_setzero_v2r8
+#define gmx_simd_add_f            _fjsp_add_v2r8
+#define gmx_simd_sub_f            _fjsp_sub_v2r8
+#define gmx_simd_mul_f            _fjsp_mul_v2r8
+#define gmx_simd_fmadd_f(a, b, c)   _fjsp_madd_v2r8(a, b, c)
+#define gmx_simd_fmsub_f(a, b, c)   _fjsp_msub_v2r8(a, b, c)
+#define gmx_simd_fnmadd_f(a, b, c)  _fjsp_nmsub_v2r8(a, b, c)
+#define gmx_simd_fnmsub_f(a, b, c)  _fjsp_nmadd_v2r8(a, b, c)
+#define gmx_simd_and_f            _fjsp_and_v2r8
+#define gmx_simd_andnot_f         _fjsp_andnot1_v2r8
+#define gmx_simd_or_f             _fjsp_or_v2r8
+#define gmx_simd_xor_f            _fjsp_xor_v2r8
+#define gmx_simd_rsqrt_f          _fjsp_rsqrta_v2r8
+#define gmx_simd_rcp_f            _fjsp_rcpa_v2r8
+#define gmx_simd_fabs_f(x)        _fjsp_abs_v2r8(x)
+#define gmx_simd_fneg_f(x)        _fjsp_neg_v2r8(x)
+#define gmx_simd_max_f            _fjsp_max_v2r8
+#define gmx_simd_min_f            _fjsp_min_v2r8
+#define gmx_simd_round_f(x)       gmx_simd_round_d(x)
+#define gmx_simd_trunc_f(x)       gmx_simd_trunc_d(x)
+#define gmx_simd_fraction_f(x)    gmx_simd_sub_f(x, gmx_simd_trunc_f(x))
+#define gmx_simd_get_exponent_f   gmx_simd_get_exponent_d_sparc64_hpc_ace
+#define gmx_simd_get_mantissa_f   gmx_simd_get_mantissa_d_sparc64_hpc_ace
+#define gmx_simd_set_exponent_f   gmx_simd_set_exponent_d_sparc64_hpc_ace
+/* integer datatype corresponding to float: gmx_simd_fint32_t */
+#define gmx_simd_fint32_t         _fjsp_v2r8
+#define gmx_simd_load_fi(m)       gmx_simd_load_di_sparc64_hpc_ace(m)
+#define gmx_simd_set1_fi(i)       gmx_simd_set1_di_sparc64_hpc_ace(i)
+#define gmx_simd_store_fi(m, x)   gmx_simd_store_di_sparc64_hpc_ace(m, x)
+#define gmx_simd_loadu_fi         gmx_simd_load_fi
+/* No unaligned store of gmx_simd_fint32_t */
+#define gmx_simd_setzero_fi       _fjsp_setzero_v2r8
+#define gmx_simd_cvt_f2i          gmx_simd_cvt_d2i
+#define gmx_simd_cvtt_f2i         _fjsp_dtox_v2r8
+#define gmx_simd_cvt_i2f          _fjsp_xtod_v2r8
+#define gmx_simd_extract_fi      gmx_simd_extract_di_sparc64_hpc_ace
+/* Integer logical ops on gmx_simd_fint32_t */
+/* Shifts are horrible since they require memory re-loads. */
+#define gmx_simd_slli_fi          gmx_simd_slli_di_sparc64_hpc_ace
+#define gmx_simd_srli_fi          gmx_simd_srli_di_sparc64_hpc_ace
+#define gmx_simd_and_fi           _fjsp_and_v2r8
+#define gmx_simd_andnot_fi(a, b)   _fjsp_andnot1_v2r8(a, b)
+#define gmx_simd_or_fi            _fjsp_or_v2r8
+#define gmx_simd_xor_fi           _fjsp_xor_v2r8
+/* No integer arithmetic ops on gmx_simd_fint32_t */
+/* Boolean & comparison operations on gmx_simd_float_t */
+#define gmx_simd_fbool_t          _fjsp_v2r8
+#define gmx_simd_cmpeq_f          _fjsp_cmpeq_v2r8
+#define gmx_simd_cmplt_f          _fjsp_cmplt_v2r8
+#define gmx_simd_cmple_f          _fjsp_cmple_v2r8
+#define gmx_simd_and_fb           _fjsp_and_v2r8
+#define gmx_simd_or_fb            _fjsp_or_v2r8
+#define gmx_simd_anytrue_fb       gmx_simd_anytrue_d_sparc64_hpc_ace
+#define gmx_simd_blendzero_f      _fjsp_and_v2r8
+#define gmx_simd_blendnotzero_f(a, sel) _fjsp_andnot1_v2r8(sel, a)
+#define gmx_simd_blendv_f(a, b, s) _fjsp_selmov_v2r8(b, a, s)
+#define gmx_simd_reduce_f(a)       gmx_simd_reduce_d_sparc64_hpc_ace(a)
+/* No boolean & comparison operations on gmx_simd_fint32_t */
+/* No conversions between different booleans */
+
+/****************************************************
+ *      DOUBLE PRECISION SIMD IMPLEMENTATION        *
+ ****************************************************/
+#define gmx_simd_double_t          _fjsp_v2r8
+#define gmx_simd_load_d            _fjsp_load_v2r8
+#define gmx_simd_load1_d(m)        _fjsp_set_v2r8((*m), (*m))
+#define gmx_simd_set1_d(a)         _fjsp_set_v2r8(a, a)
+#define gmx_simd_store_d           _fjsp_store_v2r8
+#define gmx_simd_loadu_d           gmx_simd_load_d
+/* No unaligned store of gmx_simd_double_t */
+#define gmx_simd_setzero_d         _fjsp_setzero_v2r8
+#define gmx_simd_add_d             _fjsp_add_v2r8
+#define gmx_simd_sub_d             _fjsp_sub_v2r8
+#define gmx_simd_mul_d             _fjsp_mul_v2r8
+#define gmx_simd_fmadd_d(a, b, c)   _fjsp_madd_v2r8(a, b, c)
+#define gmx_simd_fmsub_d(a, b, c)   _fjsp_msub_v2r8(a, b, c)
+#define gmx_simd_fnmadd_d(a, b, c)  _fjsp_nmsub_v2r8(a, b, c)
+#define gmx_simd_fnmsub_d(a, b, c)  _fjsp_nmadd_v2r8(a, b, c)
+#define gmx_simd_and_d             _fjsp_and_v2r8
+#define gmx_simd_andnot_d          _fjsp_andnot1_v2r8
+#define gmx_simd_or_d              _fjsp_or_v2r8
+#define gmx_simd_xor_d             _fjsp_xor_v2r8
+#define gmx_simd_rsqrt_d(x)        _fjsp_rsqrta_v2r8(x)
+#define gmx_simd_rcp_d(x)          _fjsp_rcpa_v2r8(x)
+#define gmx_simd_fabs_d(x)         _fjsp_abs_v2r8(x)
+#define gmx_simd_fneg_d(x)         _fjsp_neg_v2r8(x)
+#define gmx_simd_max_d             _fjsp_max_v2r8
+#define gmx_simd_min_d             _fjsp_min_v2r8
+#define gmx_simd_round_d(x)        gmx_simd_cvt_i2d(gmx_simd_cvt_d2i(x))
+#define gmx_simd_trunc_d(x)        gmx_simd_cvt_i2d(gmx_simd_cvtt_d2i(x))
+#define gmx_simd_fraction_d(x)     gmx_simd_sub_d(x, gmx_simd_trunc_d(x))
+#define gmx_simd_get_exponent_d    gmx_simd_get_exponent_d_sparc64_hpc_ace
+#define gmx_simd_get_mantissa_d    gmx_simd_get_mantissa_d_sparc64_hpc_ace
+#define gmx_simd_set_exponent_d    gmx_simd_set_exponent_d_sparc64_hpc_ace
+/* integer datatype corresponding to double: gmx_simd_dint32_t */
+#define gmx_simd_dint32_t          _fjsp_v2r8
+#define gmx_simd_load_di(m)        gmx_simd_load_di_sparc64_hpc_ace(m)
+#define gmx_simd_set1_di(i)        gmx_simd_set1_di_sparc64_hpc_ace(i)
+#define gmx_simd_store_di(m, x)    gmx_simd_store_di_sparc64_hpc_ace(m, x)
+#define gmx_simd_loadu_di          gmx_simd_load_di
+/* No unaligned store of gmx_simd_dint32_t */
+#define gmx_simd_setzero_di        _fjsp_setzero_v2r8
+#define gmx_simd_cvt_d2i           gmx_simd_cvt_d2i_sparc64_hpc_ace
+#define gmx_simd_cvtt_d2i          _fjsp_dtox_v2r8
+#define gmx_simd_cvt_i2d           _fjsp_xtod_v2r8
+#define gmx_simd_extract_di        gmx_simd_extract_di_sparc64_hpc_ace
+/* Integer logical ops on gmx_simd_dint32_t */
+#define gmx_simd_slli_di           gmx_simd_slli_di_sparc64_hpc_ace
+#define gmx_simd_srli_di           gmx_simd_srli_di_sparc64_hpc_ace
+#define gmx_simd_and_di            _fjsp_and_v2r8
+#define gmx_simd_andnot_di         _fjsp_andnot1_v2r8
+#define gmx_simd_or_di             _fjsp_or_v2r8
+#define gmx_simd_xor_di            _fjsp_xor_v2r8
+/* Integer arithmetic ops on integer datatype corresponding to double */
+/* Boolean & comparison operations on gmx_simd_double_t */
+#define gmx_simd_dbool_t           _fjsp_v2r8
+#define gmx_simd_cmpeq_d           _fjsp_cmpeq_v2r8
+#define gmx_simd_cmplt_d           _fjsp_cmplt_v2r8
+#define gmx_simd_cmple_d           _fjsp_cmple_v2r8
+#define gmx_simd_and_db            _fjsp_and_v2r8
+#define gmx_simd_or_db             _fjsp_or_v2r8
+#define gmx_simd_anytrue_db         gmx_simd_anytrue_d_sparc64_hpc_ace
+#define gmx_simd_blendzero_d        _fjsp_and_v2r8
+#define gmx_simd_blendnotzero_d(a, sel)  _fjsp_andnot1_v2r8(sel, a)
+#define gmx_simd_blendv_d(a, b, sel) _fjsp_selmov_v2r8(b, a, sel)
+#define gmx_simd_reduce_d(a)        gmx_simd_reduce_d_sparc64_hpc_ace(a)
+
+/* No boolean & comparison operations on gmx_simd_dint32_t */
+/* Float/double conversion */
+#define gmx_simd_cvt_f2d(f)         (f)
+#define gmx_simd_cvt_d2f(d)         (d)
+
+
+/****************************************************
+ * SINGLE PRECISION IMPLEMENTATION HELPER FUNCTIONS *
+ ****************************************************/
+static gmx_inline gmx_simd_float_t
+gmx_simd_load_f_sparc64_hpc_ace(const float *m)
+{
+    /* We are not allowed to cast single-to-double registers, but we can
+     * masquerade the memory location as a variable of type _fjsp_v2r4.
+     */
+    const _fjsp_v2r4 *p = (const _fjsp_v2r4 *)m;
+    _fjsp_v2r4        simd;
+
+    simd = *p;
+    return _fjsp_stod_v2r8(simd);
+}
+
+static gmx_inline void
+gmx_simd_store_f_sparc64_hpc_ace(float *m, gmx_simd_float_t x)
+{
+    /* We are not allowed to cast single-to-double registers, but we can
+     * masquerade the memory location as a variable of type _fjsp_v2r4.
+     */
+    _fjsp_v2r4 *p = (_fjsp_v2r4 *)m;
+    *p = _fjsp_dtos_v2r4(x);
+}
+
+static gmx_inline gmx_simd_dint32_t
+gmx_simd_load_di_sparc64_hpc_ace(const int *m)
+{
+    union
+    {
+        _fjsp_v2r8       simd;
+        long long int    i[2];
+    }
+    conv;
+
+    conv.i[0] = m[0];
+    conv.i[1] = m[1];
+
+    return _fjsp_load_v2r8( (double *) &(conv.simd) );
+}
+
+static gmx_inline void
+gmx_simd_store_di_sparc64_hpc_ace(int *m, gmx_simd_dint32_t x)
+{
+    union
+    {
+        _fjsp_v2r8       simd;
+        long long int    i[2];
+    }
+    conv;
+
+    _fjsp_store_v2r8( (double *) &(conv.simd), x );
+
+    m[0] = conv.i[0];
+    m[1] = conv.i[1];
+}
+
+static gmx_inline gmx_simd_dint32_t
+gmx_simd_set1_di_sparc64_hpc_ace(int i)
+{
+    union
+    {
+        _fjsp_v2r8       simd;
+        long long int    i[2];
+    }
+    conv;
+
+    conv.i[0] = i;
+    conv.i[1] = i;
+
+    return _fjsp_load_v2r8( (double *) &(conv.simd) );
+}
+
+static gmx_inline int
+gmx_simd_extract_di_sparc64_hpc_ace(gmx_simd_dint32_t x, int i)
+{
+    long long int res;
+    /* This conditional should be optimized away at compile time */
+    if (i == 0)
+    {
+        _fjsp_storel_v2r8((double *)&res, x);
+    }
+    else
+    {
+        _fjsp_storeh_v2r8((double *)&res, x);
+    }
+    return (int)res;
+}
+
+static gmx_inline gmx_simd_dint32_t
+gmx_simd_slli_di_sparc64_hpc_ace(gmx_simd_dint32_t x, int i)
+{
+    _fjsp_v2i8 ix = *((_fjsp_v2i8 *)&x);
+    ix = _fjsp_slli_v2i8(ix, i);
+    x  = *((_fjsp_v2r8 *)&ix);
+    return x;
+}
+
+static gmx_inline gmx_simd_dint32_t
+gmx_simd_srli_di_sparc64_hpc_ace(gmx_simd_dint32_t x, int i)
+{
+    _fjsp_v2i8 ix = *((_fjsp_v2i8 *)&x);
+    ix = _fjsp_srli_v2i8(ix, i);
+    x  = *((_fjsp_v2r8 *)&ix);
+    return x;
+}
+
+static gmx_inline gmx_simd_dint32_t
+gmx_simd_cvt_d2i_sparc64_hpc_ace(gmx_simd_double_t x)
+{
+    _fjsp_v2r8 signbit = _fjsp_set_v2r8(-0.0, -0.0);
+    _fjsp_v2r8 half    = _fjsp_set_v2r8(0.5, 0.5);
+
+    x = _fjsp_add_v2r8(x, _fjsp_or_v2r8(_fjsp_and_v2r8(signbit, x), half));
+    return _fjsp_dtox_v2r8(x);
+}
+
+static gmx_inline int
+gmx_simd_anytrue_d_sparc64_hpc_ace(gmx_simd_dbool_t x)
+{
+    long long int i;
+    x = _fjsp_or_v2r8(x, _fjsp_unpackhi_v2r8(x, x));
+    _fjsp_storel_v2r8((double *)&i, x);
+    return (i != 0LL);
+}
+
+static gmx_inline double
+gmx_simd_reduce_d_sparc64_hpc_ace(gmx_simd_double_t x)
+{
+    double d;
+    x = _fjsp_add_v2r8(x, _fjsp_unpackhi_v2r8(x, x));
+    _fjsp_storel_v2r8(&d, x);
+    return d;
+}
+
+
+static gmx_inline gmx_simd_double_t
+gmx_simd_get_exponent_d_sparc64_hpc_ace(gmx_simd_double_t x)
+{
+    /* HPC-ACE cannot cast _fjsp_v2r8 to _fjsp_v4i4, so to perform shifts we
+     * would need to store and reload. Since we are only operating on two
+     * numbers it is likely more efficient to do the operations directly on
+     * normal registers.
+     */
+    const gmx_int64_t    expmask   = 0x7ff0000000000000LL;
+    const gmx_int64_t    expbias   = 1023LL;
+
+    union
+    {
+        _fjsp_v2r8       simd;
+        long long int    i[2];
+    }
+    conv;
+
+    _fjsp_store_v2r8( (double *) &conv.simd, x);
+    conv.i[0] = ((conv.i[0] & expmask) >> 52) - expbias;
+    conv.i[1] = ((conv.i[1] & expmask) >> 52) - expbias;
+    x         = _fjsp_load_v2r8( (double *) &conv.simd);
+    return _fjsp_xtod_v2r8(x);
+}
+
+static gmx_inline gmx_simd_double_t
+gmx_simd_get_mantissa_d_sparc64_hpc_ace(gmx_simd_double_t x)
+{
+    gmx_int64_t       mantmask[2] = {0x000fffffffffffffLL, 0x000fffffffffffffLL};
+    gmx_simd_double_t one         = _fjsp_set_v2r8(1.0, 1.0);
+
+    x = _fjsp_and_v2r8(x, _fjsp_load_v2r8((double *)mantmask));
+    return _fjsp_or_v2r8(x, one);
+}
+
+static gmx_inline gmx_simd_double_t
+gmx_simd_set_exponent_d_sparc64_hpc_ace(gmx_simd_double_t x)
+{
+    const gmx_int64_t    expbias   = 1023;
+    union
+    {
+        _fjsp_v2r8       simd;
+        long long int    i[2];
+    }
+    conv;
+
+
+    _fjsp_store_v2r8( (double *) &conv.simd, gmx_simd_cvt_d2i_sparc64_hpc_ace(x));
+    conv.i[0] = (conv.i[0] + expbias) << 52;
+    conv.i[1] = (conv.i[1] + expbias) << 52;
+
+    return _fjsp_load_v2r8( (double *) &conv.simd);
+}
+
+
+/* No SIMD4 support, since both single & double are only 2-wide */
+
+#endif /* GMX_SIMD_IMPL_SPARC64_HPC_ACE_H */
index f7c73eb7a4a14e74b2edc62bffa43d5005b3e78a..49ca593f50c3570667a166297099236b5d3622fa 100644 (file)
@@ -125,6 +125,8 @@ static gmx_inline double * gmx_simd4_align_d(double *p);
 #    include "gromacs/simd/impl_x86_sse2/impl_x86_sse2.h"
 #elif defined GMX_SIMD_IBM_QPX
 #    include "gromacs/simd/impl_ibm_qpx/impl_ibm_qpx.h"
+#elif defined GMX_SIMD_SPARC64_HPC_ACE
+#    include "gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace.h"
 #elif (defined GMX_SIMD_REFERENCE) || (defined DOXYGEN)
 /* Plain C SIMD reference implementation, also serves as documentation.
  * For now this code path will also be taken for Sparc64_HPC_ACE since we have
index 81121cc3d6385fd5a903e616515fbc3ed1168fac..c8429f729f8eeee81fa090dc602ffdec2bb20e68 100644 (file)
@@ -80,21 +80,6 @@ const gmx_simd_real_t rSimd_Exp      = setSimdRealFrom3R( 1.40552351710274526239
 const gmx_simd_real_t rSimd_ExpDouble = setSimdRealFrom3R( 6.287393598732017379054414e+176,
                                                            8.794495252903116023030553e-140,
                                                            -3.637060701570496477655022e+202);
-// Magic FP numbers corresponding to specific bit patterns
-const gmx_simd_real_t rSimd_Bits1    = setSimdRealFrom1R(-1.07730874267432137e+236);
-const gmx_simd_real_t rSimd_Bits2    = setSimdRealFrom1R(-9.25596313493178307e+061);
-const gmx_simd_real_t rSimd_Bits3    = setSimdRealFrom1R(-8.57750588235293981e+003);
-const gmx_simd_real_t rSimd_Bits4    = setSimdRealFrom1R( 1.22416778341839096e-250);
-const gmx_simd_real_t rSimd_Bits5    = setSimdRealFrom1R(-1.15711777004554095e+294);
-const gmx_simd_real_t rSimd_Bits6    = setSimdRealFrom1R( 1.53063836115600621e-018);
-#    else
-// Magic FP numbers corresponding to specific bit patterns
-const gmx_simd_real_t rSimd_Bits1    = setSimdRealFrom1R(-5.9654142337e+29);
-const gmx_simd_real_t rSimd_Bits2    = setSimdRealFrom1R(-1.0737417600e+08);
-const gmx_simd_real_t rSimd_Bits3    = setSimdRealFrom1R(-6.0235290527e+00);
-const gmx_simd_real_t rSimd_Bits4    = setSimdRealFrom1R( 1.0788832913e-31);
-const gmx_simd_real_t rSimd_Bits5    = setSimdRealFrom1R(-1.0508719529e+37);
-const gmx_simd_real_t rSimd_Bits6    = setSimdRealFrom1R( 1.1488970369e-02);
 #    endif
 #endif  // GMX_SIMD_HAVE_REAL
 #ifdef GMX_SIMD_HAVE_INT32
index a2d51c16f273daced0bfb44a29c04420f72682a0..79ff7783d87e0a6557ddccc062d0c82cf4355aec 100644 (file)
@@ -68,20 +68,6 @@ const gmx_simd4_real_t rSimd4_Exp      = setSimd4RealFrom3R( 1.40552351710274526
 const gmx_simd4_real_t  rSimd_ExpDouble = setSimd4RealFrom3R( 6.287393598732017379054414e+176,
                                                               8.794495252903116023030553e-140,
                                                               -3.637060701570496477655022e+202);
-// Magic FP numbers corresponding to specific bit patterns
-const gmx_simd4_real_t rSimd4_Bits1    = setSimd4RealFrom1R(-1.07730874267432137e+236);
-const gmx_simd4_real_t rSimd4_Bits2    = setSimd4RealFrom1R(-9.25596313493178307e+061);
-const gmx_simd4_real_t rSimd4_Bits3    = setSimd4RealFrom1R(-8.57750588235293981e+003);
-const gmx_simd4_real_t rSimd4_Bits4    = setSimd4RealFrom1R( 1.22416778341839096e-250);
-const gmx_simd4_real_t rSimd4_Bits5    = setSimd4RealFrom1R(-1.15711777004554095e+294);
-const gmx_simd4_real_t rSimd4_Bits6    = setSimd4RealFrom1R( 1.53063836115600621e-018);
-#    else
-const gmx_simd4_real_t rSimd4_Bits1    = setSimd4RealFrom1R(-5.9654142337e+29);
-const gmx_simd4_real_t rSimd4_Bits2    = setSimd4RealFrom1R(-1.0737417600e+08);
-const gmx_simd4_real_t rSimd4_Bits3    = setSimd4RealFrom1R(-6.0235290527e+00);
-const gmx_simd4_real_t rSimd4_Bits4    = setSimd4RealFrom1R( 1.0788832913e-31);
-const gmx_simd4_real_t rSimd4_Bits5    = setSimd4RealFrom1R(-1.0508719529e+37);
-const gmx_simd4_real_t rSimd4_Bits6    = setSimd4RealFrom1R( 1.1488970369e-02);
 #    endif
 
 ::std::vector<real>
index cde24e81af458c52e656e18a2b5f68a8f2b5c328..5d7a92251e589bd64cdb7da83d36c14fb51f45e1 100644 (file)
@@ -121,25 +121,47 @@ TEST_F(Simd4FloatingpointTest, gmxSimd4FnegR)
 }
 
 #ifdef GMX_SIMD4_HAVE_LOGICAL
+/* 1.3333282470703125 has mantissa 0101010101010101 (followed by zeros)
+ * 1.79998779296875   has mantissa 1100110011001100 (followed by zeros)
+ * 1.26666259765625   has mantissa 0100010001000100 (followed by zeros)
+ * 1.8666534423828125 has mantissa 1101110111011101 (followed by zeros)
+ *
+ * Since all of them have the same exponent (2^0), the exponent will
+ * not change with AND or OR operations.
+ */
 TEST_F(Simd4FloatingpointTest, gmxSimd4AndR)
 {
-    GMX_EXPECT_SIMD4_REAL_EQ(rSimd4_Bits3, gmx_simd4_and_r(rSimd4_Bits1, rSimd4_Bits2)); // Bits1 & Bits2 = Bits3
+    GMX_EXPECT_SIMD4_REAL_EQ(setSimd4RealFrom1R(1.26666259765625),
+                             gmx_simd4_and_r(gmx_simd4_set1_r(1.3333282470703125),
+                                             gmx_simd4_set1_r(1.79998779296875)));
 }
 
-TEST_F(Simd4FloatingpointTest, gmxSimd4AndnotR)
+TEST_F(Simd4FloatingpointTest, gmxSimd4OrR)
 {
-    GMX_EXPECT_SIMD4_REAL_EQ(rSimd4_Bits4, gmx_simd4_andnot_r(rSimd4_Bits1, rSimd4_Bits2)); // (~Bits1) & Bits2 = Bits3
+    GMX_EXPECT_SIMD4_REAL_EQ(setSimd4RealFrom1R(1.8666534423828125),
+                             gmx_simd4_or_r(gmx_simd4_set1_r(1.3333282470703125),
+                                            gmx_simd4_set1_r(1.79998779296875)));
 }
 
-TEST_F(Simd4FloatingpointTest, gmxSimd4OrR)
+TEST_F(Simd4FloatingpointTest, gmxSimd4XorR)
 {
-    GMX_EXPECT_SIMD4_REAL_EQ(rSimd4_Bits5, gmx_simd4_or_r(rSimd4_Bits1, rSimd4_Bits2)); // Bits1 | Bits2 = Bits3
+    /* Test xor by taking xor with a number and its negative. This should result
+     * in only the sign bit being set. We then use this bit change the sign of
+     * different numbers.
+     */
+    gmx_simd4_real_t signbit = gmx_simd4_xor_r(gmx_simd4_set1_r(1.5), gmx_simd4_set1_r(-1.5));
+    GMX_EXPECT_SIMD4_REAL_EQ(setSimd4RealFrom3R(-1, 2, -3), gmx_simd4_xor_r(signbit, setSimd4RealFrom3R(1, -2, 3)));
 }
 
-TEST_F(Simd4FloatingpointTest, gmxSimd4XorR)
+TEST_F(Simd4FloatingpointTest, gmxSimd4AndnotR)
 {
-    GMX_EXPECT_SIMD4_REAL_EQ(rSimd4_Bits6, gmx_simd4_xor_r(rSimd4_Bits1, rSimd4_Bits2)); // Bits1 ^ Bits2 = Bits3
+    /* Use xor (which we already tested, so fix that first if both tests fail)
+     * to extract the sign bit, and then use andnot to take absolute values.
+     */
+    gmx_simd4_real_t signbit = gmx_simd4_xor_r(gmx_simd4_set1_r(1.5), gmx_simd4_set1_r(-1.5));
+    GMX_EXPECT_SIMD4_REAL_EQ(setSimd4RealFrom3R(1, 2, 3), gmx_simd4_andnot_r(signbit, setSimd4RealFrom3R(-1, 2, -3)));
 }
+
 #endif
 
 TEST_F(Simd4FloatingpointTest, gmxSimd4MaxR)
index 5be60b44b66894f873a410298206f2a56400cb14..78584b72c9b1f7cac495079122adb74cc7b0225e 100644 (file)
@@ -128,25 +128,47 @@ TEST_F(SimdFloatingpointTest, gmxSimdFnegR)
 }
 
 #ifdef GMX_SIMD_HAVE_LOGICAL
+/* 1.3333282470703125 has mantissa 0101010101010101 (followed by zeros)
+ * 1.79998779296875   has mantissa 1100110011001100 (followed by zeros)
+ * 1.26666259765625   has mantissa 0100010001000100 (followed by zeros)
+ * 1.8666534423828125 has mantissa 1101110111011101 (followed by zeros)
+ *
+ * Since all of them have the same exponent (2^0), the exponent will
+ * not change with AND or OR operations.
+ */
 TEST_F(SimdFloatingpointTest, gmxSimdAndR)
 {
-    GMX_EXPECT_SIMD_REAL_EQ(rSimd_Bits3, gmx_simd_and_r(rSimd_Bits1, rSimd_Bits2)); // Bits1 & Bits2 = Bits3
+    GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom1R(1.26666259765625),
+                            gmx_simd_and_r(gmx_simd_set1_r(1.3333282470703125),
+                                           gmx_simd_set1_r(1.79998779296875)));
 }
 
-TEST_F(SimdFloatingpointTest, gmxSimdAndnotR)
+TEST_F(SimdFloatingpointTest, gmxSimdOrR)
 {
-    GMX_EXPECT_SIMD_REAL_EQ(rSimd_Bits4, gmx_simd_andnot_r(rSimd_Bits1, rSimd_Bits2)); // (~Bits1) & Bits2 = Bits3
+    GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom1R(1.8666534423828125),
+                            gmx_simd_or_r(gmx_simd_set1_r(1.3333282470703125),
+                                          gmx_simd_set1_r(1.79998779296875)));
 }
 
-TEST_F(SimdFloatingpointTest, gmxSimdOrR)
+TEST_F(SimdFloatingpointTest, gmxSimdXorR)
 {
-    GMX_EXPECT_SIMD_REAL_EQ(rSimd_Bits5, gmx_simd_or_r(rSimd_Bits1, rSimd_Bits2)); // Bits1 | Bits2 = Bits3
+    /* Test xor by taking xor with a number and its negative. This should result
+     * in only the sign bit being set. We then use this bit change the sign of
+     * different numbers.
+     */
+    gmx_simd_real_t signbit = gmx_simd_xor_r(gmx_simd_set1_r(1.5), gmx_simd_set1_r(-1.5));
+    GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(-1, 2, -3), gmx_simd_xor_r(signbit, setSimdRealFrom3R(1, -2, 3)));
 }
 
-TEST_F(SimdFloatingpointTest, gmxSimdXorR)
+TEST_F(SimdFloatingpointTest, gmxSimdAndnotR)
 {
-    GMX_EXPECT_SIMD_REAL_EQ(rSimd_Bits6, gmx_simd_xor_r(rSimd_Bits1, rSimd_Bits2)); // Bits1 ^ Bits2 = Bits3
+    /* Use xor (which we already tested, so fix that first if both tests fail)
+     * to extract the sign bit, and then use andnot to take absolute values.
+     */
+    gmx_simd_real_t signbit = gmx_simd_xor_r(gmx_simd_set1_r(1.5), gmx_simd_set1_r(-1.5));
+    GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(1, 2, 3), gmx_simd_andnot_r(signbit, setSimdRealFrom3R(-1, 2, -3)));
 }
+
 #endif
 
 TEST_F(SimdFloatingpointTest, gmxSimdMaxR)
index 8c7bffbf542b77c827a515ccba1a9522cce6d95e..64ef078feb8833726b0492fb183612e2006d6526 100644 (file)
@@ -63,7 +63,7 @@ int gmx_gethostname(char *name, size_t len)
     {
         gmx_incons("gmx_gethostname called with len<8");
     }
-#if defined(HAVE_UNISTD_H) && !defined(__native_client__)
+#if defined(HAVE_UNISTD_H) && !defined(__native_client__) && !defined(__MINGW32__)
     if (gethostname(name, len-1) != 0)
     {
         std::strncpy(name, "unknown", 8);
index 0733dc377197a191a661f1101b150d0739f45707..0768c5086410cfe35b4caf93f71c319210519316 100644 (file)
@@ -192,20 +192,22 @@ void trim (char *str)
 char *
 gmx_ctime_r(const time_t *clock, char *buf, int n)
 {
-    char tmpbuf[STRLEN];
-
-#ifdef GMX_NATIVE_WINDOWS
+#ifdef _MSC_VER
     /* Windows */
-    ctime_s( tmpbuf, STRLEN, clock );
+    ctime_s( buf, n, clock );
+#elif defined(GMX_NATIVE_WINDOWS)
+    char *tmpbuf = ctime( clock );
+    strncpy(buf, tmpbuf, n-1);
+    buf[n-1] = '\0';
 #elif (defined(__sun))
     /*Solaris*/
-    ctime_r(clock, tmpbuf, n);
+    ctime_r(clock, buf, n);
 #else
+    char tmpbuf[STRLEN];
     ctime_r(clock, tmpbuf);
-#endif
     strncpy(buf, tmpbuf, n-1);
     buf[n-1] = '\0';
-
+#endif
     return buf;
 }
 
index e7d840f355dcdb6aacbc289e403182b943b0643f..af28843ef532da32ed1df43370144d54af280627 100644 (file)
@@ -58,6 +58,7 @@
 #endif
 
 #ifdef GMX_NATIVE_WINDOWS
+#include <windows.h>
 #include <direct.h>
 #include <io.h>
 #endif
@@ -234,7 +235,11 @@ gmx_off_t gmx_ftell(FILE *stream)
     return ftello(stream);
 #else
 #ifdef HAVE__FSEEKI64
+#ifndef __MINGW32__
     return _ftelli64(stream);
+#else
+    return ftello64(stream);
+#endif
 #else
     return ftell(stream);
 #endif
@@ -487,12 +492,12 @@ FILE *gmx_ffopen(const char *file, const char *mode)
 /* Our own implementation of dirent-like functionality to scan directories. */
 struct gmx_directory
 {
-#ifdef HAVE_DIRENT_H
-    DIR  *               dirent_handle;
-#elif (defined GMX_NATIVE_WINDOWS)
+#if defined(GMX_NATIVE_WINDOWS)
     intptr_t             windows_handle;
     struct _finddata_t   finddata;
     int                  first;
+#elif defined(HAVE_DIRENT_H)
+    DIR  *               dirent_handle;
 #else
     int                  dummy;
 #endif
@@ -509,19 +514,7 @@ gmx_directory_open(gmx_directory_t *p_gmxdir, const char *dirname)
 
     *p_gmxdir = gmxdir;
 
-#ifdef HAVE_DIRENT_H
-    if ( (gmxdir->dirent_handle = opendir(dirname)) != NULL)
-    {
-        rc = 0;
-    }
-    else
-    {
-        sfree(gmxdir);
-        *p_gmxdir = NULL;
-        rc        = EINVAL;
-    }
-#elif (defined GMX_NATIVE_WINDOWS)
-
+#if defined(GMX_NATIVE_WINDOWS)
     if (dirname != NULL && strlen(dirname) > 0)
     {
         char *     tmpname;
@@ -564,6 +557,17 @@ gmx_directory_open(gmx_directory_t *p_gmxdir, const char *dirname)
     {
         rc = EINVAL;
     }
+#elif defined(HAVE_DIRENT_H)
+    if ( (gmxdir->dirent_handle = opendir(dirname)) != NULL)
+    {
+        rc = 0;
+    }
+    else
+    {
+        sfree(gmxdir);
+        *p_gmxdir = NULL;
+        rc        = EINVAL;
+    }
 #else
     gmx_fatal(FARGS,
               "Source compiled without POSIX dirent or windows support - cannot scan directories.\n"
@@ -581,8 +585,41 @@ gmx_directory_nextfile(gmx_directory_t gmxdir, char *name, int maxlength_name)
 {
     int                     rc;
 
-#ifdef HAVE_DIRENT_H
+#if defined(GMX_NATIVE_WINDOWS)
+    if (gmxdir != NULL)
+    {
+        if (gmxdir->windows_handle <= 0)
+        {
 
+            name[0] = '\0';
+            rc      = ENOENT;
+        }
+        else if (gmxdir->first == 1)
+        {
+            strncpy(name, gmxdir->finddata.name, maxlength_name);
+            rc            = 0;
+            gmxdir->first = 0;
+        }
+        else
+        {
+            if (_findnext(gmxdir->windows_handle, &gmxdir->finddata) == 0)
+            {
+                strncpy(name, gmxdir->finddata.name, maxlength_name);
+                rc      = 0;
+            }
+            else
+            {
+                name[0] = '\0';
+                rc      = ENOENT;
+            }
+        }
+    }
+    else
+    {
+        name[0] = '\0';
+        rc      = EINVAL;
+    }
+#elif defined(HAVE_DIRENT_H)
     struct dirent *         direntp_large;
     struct dirent *         p;
 
@@ -613,38 +650,6 @@ gmx_directory_nextfile(gmx_directory_t gmxdir, char *name, int maxlength_name)
         name[0] = '\0';
         rc      = EINVAL;
     }
-
-#elif (defined GMX_NATIVE_WINDOWS)
-
-    if (gmxdir != NULL)
-    {
-        if (gmxdir->windows_handle <= 0)
-        {
-
-            name[0] = '\0';
-            rc      = ENOENT;
-        }
-        else if (gmxdir->first == 1)
-        {
-            strncpy(name, gmxdir->finddata.name, maxlength_name);
-            rc            = 0;
-            gmxdir->first = 0;
-        }
-        else
-        {
-            if (_findnext(gmxdir->windows_handle, &gmxdir->finddata) == 0)
-            {
-                strncpy(name, gmxdir->finddata.name, maxlength_name);
-                rc      = 0;
-            }
-            else
-            {
-                name[0] = '\0';
-                rc      = ENOENT;
-            }
-        }
-    }
-
 #else
     gmx_fatal(FARGS,
               "Source compiled without POSIX dirent or windows support - cannot scan directories.\n");
@@ -658,10 +663,10 @@ int
 gmx_directory_close(gmx_directory_t gmxdir)
 {
     int                     rc;
-#ifdef HAVE_DIRENT_H
-    rc = (gmxdir != NULL) ? closedir(gmxdir->dirent_handle) : EINVAL;
-#elif (defined GMX_NATIVE_WINDOWS)
+#if defined(GMX_NATIVE_WINDOWS)
     rc = (gmxdir != NULL) ? _findclose(gmxdir->windows_handle) : EINVAL;
+#elif defined(HAVE_DIRENT_H)
+    rc = (gmxdir != NULL) ? closedir(gmxdir->dirent_handle) : EINVAL;
 #else
     gmx_fatal(FARGS,
               "Source compiled without POSIX dirent or windows support - cannot scan directories.\n");
index 54a873e9feb098e56fcabe8ece1c869aead8eb67..ae67ee18519c711affefb2b4ceceafb8d2bcc3b5 100644 (file)
@@ -47,7 +47,6 @@
  * \ingroup module_utility
  */
 
-/* We currently don't support MingW. And ICC also defines it */
-#ifdef _MSC_VER
+#if defined( _WIN32 ) || defined( _WIN64 )
 #define GMX_NATIVE_WINDOWS
 #endif
index 292b1cd198543d38cd9fb4f49757186dfb23e024..f0e67a593f4af86bfbcfe62cf2d5f5c821aa0a52 100644 (file)
@@ -143,14 +143,14 @@ gmx_bool gmx_omp_check_thread_affinity(char **message);
  */
 static gmx_inline void gmx_pause()
 {
-#ifndef GMX_NATIVE_WINDOWS
+#ifndef _MSC_VER
     /* Ugly hack because the openmp implementation below hacks into the SIMD
      * settings to decide when to use _mm_pause(). This should eventually be
      * changed into proper detection of the intrinsics uses, not SIMD.
      */
-#if (defined GMX_SIMD_X86_SSE2) || (defined GMX_SIMD_X86_SSE4_1) || \
+#if ((defined GMX_SIMD_X86_SSE2) || (defined GMX_SIMD_X86_SSE4_1) || \
     (defined GMX_SIMD_X86_AVX_128_FMA) || (defined GMX_SIMD_X86_AVX_256) || \
-    (defined GMX_SIMD_X86_AVX2_256)
+    (defined GMX_SIMD_X86_AVX2_256)) && !defined(__MINGW32__)
     /* Replace with tbb::internal::atomic_backoff when/if we use TBB */
     _mm_pause();
 #elif defined __MIC__
index 6b45a99a82be08246483cfa8eb7035a6d6cfbf4a..e542c5532767e855591fca75a2f0f900a76453dc 100644 (file)
@@ -48,6 +48,9 @@
 #ifdef WITH_DMALLOC
 #include <dmalloc.h>
 #endif
+#ifdef HAVE__ALIGNED_MALLOC
+#include <malloc.h>
+#endif
 
 #include "thread_mpi/threads.h"
 
@@ -337,7 +340,7 @@ void *save_calloc_aligned(const char *name, const char *file, int line,
 }
 
 /* This routine can NOT be called with any pointer */
-void save_free_aligned(const char *name, const char *file, int line, void *ptr)
+void save_free_aligned(const char gmx_unused *name, const char gmx_unused *file, int gmx_unused line, void *ptr)
 {
     int   i, j;
     void *free = ptr;
index db626c4a3d4bdee72eaf0c092a52aaf3daba5558..5ea00c67d79dad5e19ee9e266d4ef481f38b6a87 100644 (file)
@@ -79,7 +79,7 @@ IntegrationTestFixture::IntegrationTestFixture()
     // TODO fix this when we have an encapsulation layer for handling
     // environment variables
 #ifdef GMX_NATIVE_WINDOWS
-    _putenv_s("GMX_MAXBACKUP", s_maxBackup.c_str());
+    _putenv(("GMX_MAXBACKUP="+s_maxBackup).c_str());
 #else
     setenv("GMX_MAXBACKUP", s_maxBackup.c_str(), true);
 #endif