endif()
set(build_types_with_explicit_flags RELEASE DEBUG RELWITHDEBUGINFO RELWITHASSERT MINSIZEREL PROFILE)
+set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ON)
set(CPACK_PACKAGE_NAME "gromacs")
set(CPACK_PACKAGE_VERSION_MAJOR ${GMX_VERSION_MAJOR})
set(CPACK_PACKAGE_VERSION_MINOR ${GMX_VERSION_MINOR})
"Hostname of the machine where the cache was generated.")
endif()
+########################################################################
+# Detect architecture before setting options so we can alter defaults
+########################################################################
+# Detect the architecture the compiler is targetting, detect
+# SIMD instructions possibilities on that hardware, suggest SIMD instruction set
+# to use if none is specified, and populate the cache option for CPU
+# SIMD.
+include(gmxDetectTargetArchitecture)
+gmx_detect_target_architecture()
+
########################################################################
# User input options #
########################################################################
set(CMAKE_PREFIX_PATH "" CACHE STRING "Extra locations to search for external libraries and tools (give directory without lib, bin, or include)")
-option(GMX_DOUBLE "Use double precision (much slower, use only if you really need it)" OFF)
+if(GMX_TARGET_FUJITSU_SPARC64)
+ # Fujitsu only has SIMD in double precision, so this will be faster
+ set(GMX_DOUBLE_DEFAULT ON)
+else()
+ set(GMX_DOUBLE_DEFAULT OFF)
+endif()
+option(GMX_DOUBLE "Use double precision (much slower, use only if you really need it)" ${GMX_DOUBLE_DEFAULT})
+option(GMX_RELAXED_DOUBLE_PRECISION "Accept single precision 1/sqrt(x) when using Fujitsu HPC-ACE SIMD" OFF)
+mark_as_advanced(GMX_RELAXED_DOUBLE_PRECISION)
+
option(GMX_MPI "Build a parallel (message-passing) version of GROMACS" OFF)
option(GMX_THREAD_MPI "Build a thread-MPI-based multithreaded version of GROMACS (not compatible with MPI)" ON)
gmx_dependent_option(
set(REQUIRED_CUDA_COMPUTE_CAPABILITY 2.0)
include(gmxManageGPU)
-# Detect the architecture the compiler is targetting, detect
-# SIMD instructions possibilities on that hardware, suggest SIMD instruction set
-# to use if none is specified, and populate the cache option for CPU
-# SIMD.
-include(gmxDetectTargetArchitecture)
-gmx_detect_target_architecture()
-
if(GMX_CPU_ACCELERATION)
# Stay compatible with old Jenkins command line options for specific SIMD acceleration
set(GMX_SIMD "${GMX_CPU_ACCELERATION}" CACHE STRING "SIMD instruction set level and compiler optimization" FORCE)
if(GMX_DOUBLE)
add_definitions(-DGMX_DOUBLE)
list(APPEND INSTALLED_HEADER_DEFINITIONS "-DGMX_DOUBLE")
+ if(GMX_RELAXED_DOUBLE_PRECISION)
+ add_definitions(-DGMX_RELAXED_DOUBLE_PRECISION)
+ endif()
endif()
if(GMX_SOFTWARE_INVSQRT)
list(APPEND INSTALLED_HEADER_DEFINITIONS "-DGMX_SOFTWARE_INVSQRT")
endif()
if(WIN32 AND NOT CYGWIN)
- set(GMX_WSOCKLIB_PATH CACHE PATH "Path to winsock (wsock32.lib) library.")
- mark_as_advanced(GMX_WSOCKLIB_PATH)
- find_library(WSOCK32_LIBRARY NAMES wsock32 PATHS ${GMX_WSOCKLIB_PATH})
- if(WSOCK32_LIBRARY)
- list(APPEND GMX_EXTRA_LIBRARIES ${WSOCK32_LIBRARY})
- add_definitions(-DGMX_HAVE_WINSOCK)
- else()
- message(STATUS "No winsock found. Cannot use interactive molecular dynamics (IMD).")
- endif(WSOCK32_LIBRARY)
+ list(APPEND GMX_EXTRA_LIBRARIES "wsock32")
+ add_definitions(-DGMX_HAVE_WINSOCK)
endif()
include(gmxManageBlueGene)
endif()
+if(GMX_TARGET_FUJITSU_SPARC64)
+ include(gmxManageFujitsuSparc64)
+endif()
+
########################################################################
#Process MPI settings
########################################################################
# the research papers on the package. Check out http://www.gromacs.org.
# the name of the target operating system
-set(CMAKE_SYSTEM_NAME Linux CACHE STRING "Cross-compiling for Fujitsu Sparc64")
+set(CMAKE_SYSTEM_NAME Linux CACHE STRING "Cross-compiling for Fujitsu Sparc64, with MPI")
+set(CMAKE_SYSTEM_PROCESSOR "s64fx")
set_property(GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS FALSE)
# set the compiler
set(CMAKE_C_COMPILER mpifccpx)
set(CMAKE_CXX_COMPILER mpiFCCpx)
-set(CMAKE_C_COMPILER_ID "Fujitsu" CACHE STRING "Prevent CMake from adding GNU-specific linker flags (-rdynamic)" FORCE)
-set(CMAKE_C_FLAGS "-Kopenmp -Kfast,reduction,swp,simd=2,uxsimd -x500 -Xg -DGMX_RELAXED_DOUBLE_PRECISION -w" CACHE STRING "Fujitsu Sparc64 C Flags" FORCE)
-set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "Fujitsu Sparc64 C++ Flags" FORCE)
-set(GMX_SOFTWARE_INVSQRT OFF CACHE BOOL "Use native 1.0/sqrt(x) on Fujitsu Sparc64" FORCE)
+# Prevent CMake from adding GNU-specific linker flags (-rdynamic)
+set(CMAKE_C_COMPILER_ID "Fujitsu" CACHE STRING "Fujitsu C cross-compiler" FORCE)
+set(CMAKE_CXX_COMPILER_ID "Fujitsu" CACHE STRING "Fujitsu C++ cross-compiler" FORCE)
-set(GMX_THREAD_MPI OFF CACHE BOOL "Use real MPI instead" FORCE)
-set(GMX_MPI ON CACHE BOOL "Use MPI library" FORCE)
-set(GMX_DOUBLE ON CACHE BOOL "Use double by default on Fujitsu Sparc64 (due to HPC-ACE)" FORCE)
-set(GMX_GPU OFF CACHE BOOL "Cannot do GPU acceleration on Fujitsu Sparc64" FORCE)
-set(BUILD_SHARED_LIBS OFF CACHE BOOL "Use static linking by default on Fujitsu Sparc64" FORCE)
-
-set(GMX_SIMD "Sparc64_HPC_ACE" CACHE STRING "Enabling Sparc64 HPC-ACE SIMD when using Fujitsu Sparc64 toolchain")
+# FindOpenMP.cmake does not try -Kopenmp,but the package will try specific
+# flags based on the compier ID.
+set(OMP_FLAG_Fujitsu "-Kopenmp")
--- /dev/null
+#
+# This file is part of the GROMACS molecular simulation package.
+#
+# Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+# and including many others, as listed in the AUTHORS file in the
+# top-level source directory and at http://www.gromacs.org.
+#
+# GROMACS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1
+# of the License, or (at your option) any later version.
+#
+# GROMACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with GROMACS; if not, see
+# http://www.gnu.org/licenses, or write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# If you want to redistribute modifications to GROMACS, please
+# consider that scientific software is very special. Version
+# control is crucial - bugs must be traceable. We will be happy to
+# consider code for inclusion in the official distribution, but
+# derived work must not be called official GROMACS. Details are found
+# in the README & COPYING files - if they are missing, get the
+# official version at http://www.gromacs.org.
+#
+# To help us fund GROMACS development, we humbly ask that you cite
+# the research papers on the package. Check out http://www.gromacs.org.
+
+# the name of the target operating system
+set(CMAKE_SYSTEM_NAME Linux CACHE STRING "Cross-compiling for Fujitsu Sparc64")
+# Set the identification to the same value we would get on the nodes (uname -m)
+set(CMAKE_SYSTEM_PROCESSOR "s64fx")
+
+set_property(GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS FALSE)
+
+# set the compiler
+set(CMAKE_C_COMPILER fccpx)
+set(CMAKE_CXX_COMPILER FCCpx)
+
+# Prevent CMake from adding GNU-specific linker flags (-rdynamic)
+# A patch has been submitted to make CMake itself handle this in the future
+set(CMAKE_C_COMPILER_ID "Fujitsu" CACHE STRING "Fujitsu C cross-compiler" FORCE)
+set(CMAKE_CXX_COMPILER_ID "Fujitsu" CACHE STRING "Fujitsu C++ cross-compiler" FORCE)
+
+# FindOpenMP.cmake does not try -Kopenmp,but the package will try specific
+# flags based on the compier ID.
+set(OMP_FLAG_Fujitsu "-Kopenmp")
--- /dev/null
+int main()
+{
+#if defined (__FUJITSU) && ( defined(__sparc) || defined(__sparcv9) ) && ( defined(__LP64__) || defined(__arch64) )
+ return 0;
+#else
+#error This compiler is not targetting Fujitsu Sparc64
+#endif
+}
--- /dev/null
+#define _WIN32_WINNT 0x0601 /*Require Windows7 (needed for MingW)*/
+#include <windows.h>
+int main()
+{
+ PROCESSOR_NUMBER p;
+ return 0;
+}
ENDMACRO(TMPI_TEST_ATOMICS VARIABLE)
+try_compile(HAVE_PROCESSOR_NUMBER ${CMAKE_BINARY_DIR} "${CMAKE_SOURCE_DIR}/cmake/TestWinProcNum.c")
include(FindThreads)
-if (CMAKE_USE_PTHREADS_INIT)
+if (CMAKE_USE_WIN32_THREADS_INIT AND HAVE_PROCESSOR_NUMBER)
+ set(THREAD_WINDOWS 1)
+ set(THREAD_LIB)
+elseif (CMAKE_USE_PTHREADS_INIT)
check_include_files(pthread.h HAVE_PTHREAD_H)
set(THREAD_PTHREADS 1)
set(THREAD_LIB ${CMAKE_THREAD_LIBS_INIT})
-elseif (CMAKE_USE_WIN32_THREADS_INIT)
- set(THREAD_WINDOWS 1)
- set(THREAD_LIB)
else()
message(FATAL_ERROR "Thread support required")
endif ()
GMX_TEST_CXXFLAG(CXXFLAGS_WARN_EXTRA "-Wextra -Wno-missing-field-initializers -Wpointer-arith" GMXC_CXXFLAGS)
endif()
+ # Fujitsu compilers on PrimeHPC/Sparc64
+ if(${CMAKE_C_COMPILER_ID} MATCHES Fujitsu OR
+ (${CMAKE_C_COMPILER_ID} MATCHES unknown AND ${CMAKE_C_COMPILER} MATCHES ^fcc))
+ GMX_TEST_CFLAG(CFLAG_GNUCOMPAT "-Xg -w" GMXC_CFLAGS)
+ GMX_TEST_CFLAG(CFLAG_OPT "-Kfast,reduction,swp,simd=2,uxsimd,fsimple -x100" GMXC_CFLAGS)
+ endif()
+
+ if(${CMAKE_CXX_COMPILER_ID} MATCHES Fujitsu OR
+ (${CMAKE_CXX_COMPILER_ID} MATCHES unknown AND ${CMAKE_CXX_COMPILER} MATCHES ^FCC))
+ GMX_TEST_CXXFLAG(CXXFLAG_GNUCOMPAT "-Xg -w" GMXC_CXXFLAGS)
+ GMX_TEST_CXXFLAG(CXXFLAG_OPT "-Kfast,reduction,swp,simd=2,uxsimd,fsimple -x100" GMXC_CXXFLAGS)
+ endif()
+
# now actually set the flags:
if (NOT GMX_SKIP_DEFAULT_CFLAGS)
gmx_set_cmake_compiler_flags()
if(NOT DEFINED GMX_SIMD)
if(GMX_TARGET_BGQ)
set(${_suggested_simd} "IBM_QPX")
+ elseif(GMX_TARGET_FUJITSU_SPARC64)
+ # HPC-ACE is always present. In the future we
+ # should add detection for HPC-ACE2 here.
+ set(${_suggested_simd} "Sparc64_HPC_ACE")
elseif(GMX_TARGET_X86)
gmx_suggest_x86_simd(${_suggested_simd})
else()
try_compile(GMX_TARGET_MIC ${CMAKE_BINARY_DIR}
"${CMAKE_SOURCE_DIR}/cmake/TestMIC.c")
endif()
+ if (NOT DEFINED GMX_TARGET_FUJITSU_SPARC64)
+ try_compile(GMX_TARGET_FUJITSU_SPARC64 ${CMAKE_BINARY_DIR}
+ "${CMAKE_SOURCE_DIR}/cmake/TestFujitsuSparc64.c")
+ endif()
endfunction()
# To help us fund GROMACS development, we humbly ask that you cite
# the research papers on the package. Check out http://www.gromacs.org.
-# the name of the target operating system
-set(CMAKE_SYSTEM_NAME Linux CACHE STRING "Cross-compiling for Fujitsu Sparc64")
+# Managing configuration for Fujitsu PrimeHPC Sparc64
+# For now this is mainly used for K computer.
+message(STATUS "Configuring for Fujitsu Sparc64")
-set_property(GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS FALSE)
-
-# set the compiler
-set(CMAKE_C_COMPILER fccpx)
-set(CMAKE_CXX_COMPILER FCCpx)
-set(CMAKE_C_COMPILER_ID "Fujitsu" CACHE STRING "Prevent CMake from adding GNU-specific linker flags (-rdynamic)" FORCE)
+set(BUILD_SHARED_LIBS OFF CACHE BOOL "Use static linking by default on Fujitsu Sparc64" FORCE)
+set(GMX_GPU OFF CACHE BOOL "Cannot do GPU acceleration on Fujitsu Sparc64" FORCE)
-set(CMAKE_C_FLAGS "-Kopenmp -Kfast,reduction,swp,simd=2,uxsimd -x500 -Xg -DGMX_RELAXED_DOUBLE_PRECISION -w" CACHE STRING "Fujitsu Sparc64 C Flags" FORCE)
-set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "Fujitsu Sparc64 C++ Flags" FORCE)
set(GMX_SOFTWARE_INVSQRT OFF CACHE BOOL "Use native 1.0/sqrt(x) on Fujitsu Sparc64" FORCE)
+set(GMX_X11 OFF CACHE BOOL "X11 not compatible with Fujitsu Sparc64 cross-compile, disabled." FORCE)
-# By default CMake will use thread-mpi
-set(GMX_DOUBLE ON CACHE BOOL "Use double by default on Fujitsu Sparc64 (due to HPC-ACE)" FORCE)
-set(GMX_GPU OFF CACHE BOOL "Cannot do GPU acceleration on Fujitsu Sparc64" FORCE)
-set(BUILD_SHARED_LIBS OFF CACHE BOOL "Use static linking by default on Fujitsu Sparc64" FORCE)
-
-set(GMX_SIMD "Sparc64_HPC_ACE" CACHE STRING "Enabling Sparc64 HPC-ACE SIMD when using Fujitsu Sparc64 toolchain")
if(OPENMP_FOUND)
# CMake on Windows doesn't support linker flags passed to target_link_libraries
# (i.e. it treats /openmp as \openmp library file). Also, no OpenMP linker flags are needed.
- if(NOT (WIN32 AND NOT CYGWIN))
+ if(NOT (WIN32 AND NOT CYGWIN AND NOT MINGW))
if(CMAKE_COMPILER_IS_GNUCC AND GMX_PREFER_STATIC_OPENMP AND NOT APPLE)
set(OpenMP_LINKER_FLAGS "-Wl,-static -lgomp -lrt -Wl,-Bdynamic -lpthread")
set(OpenMP_SHARED_LINKER_FLAGS "")
endif()
endif()
endif()
+ if(MINGW)
+ #GCC Bug 48659
+ set(OpenMP_C_FLAGS "${OpenMP_C_FLAGS} -mstackrealign")
+ endif()
else()
message(WARNING
"The compiler you are using does not support OpenMP parallelism. This might hurt your performance a lot, in particular with GPUs. Try using a more recent version, or a different compiler. For now, we are proceeding by turning off OpenMP.")
# VARIABLE will be set to true if libxml2 support is present
include(CheckLibraryExists)
+include(CheckIncludeFiles)
include(gmxOptionUtilities)
function(GMX_TEST_LIBXML2 VARIABLE)
if(LIBXML2_FOUND)
unset(LIBXML2_LINKS_OK CACHE)
endif()
check_library_exists("${LIBXML2_LIBRARIES}" "xmlTextWriterEndAttribute" "" LIBXML2_LINKS_OK)
- set(${VARIABLE} ${LIBXML2_LINKS_OK} PARENT_SCOPE)
+ if(LIBXML2_LINKS_OK)
+ #check that xml headers can be included
+ set(CMAKE_REQUIRED_INCLUDES "${LIBXML2_INCLUDE_DIR}")
+ check_include_files("libxml/parser.h" LIBXML2_INCL_OK)
+ if(NOT LIBXML2_INCL_OK)
+ #xml headers depend on iconv.h. Test whether adding its path fixes the problem
+ find_path(ICONV_INCLUDE_DIR iconv.h)
+ if(ICONV_INCLUDE_DIR)
+ set(CMAKE_REQUIRED_INCLUDES "${LIBXML2_INCLUDE_DIR};${ICONV_INCLUDE_DIR}")
+ unset(LIBXML2_INCL_OK CACHE)
+ check_include_files("libxml/parser.h" LIBXML2_INCL_OK)
+ set(LIBXML2_INCLUDE_DIR "${LIBXML2_INCLUDE_DIR};${ICONV_INCLUDE_DIR}" CACHE PATH "Libxml2 include path" FORCE)
+ endif()
+ endif()
+ set(${VARIABLE} ${LIBXML2_INCL_OK} PARENT_SCOPE)
+ else()
+ set(${VARIABLE} OFF PARENT_SCOPE)
+ endif()
else()
set(${VARIABLE} OFF PARENT_SCOPE)
endif()
elseif(${GMX_SIMD} STREQUAL "SPARC64_HPC_ACE")
+ # Note that GMX_RELAXED_DOUBLE_PRECISION is enabled by default in the top-level CMakeLists.txt
+
set(GMX_SIMD_SPARC64_HPC_ACE 1)
set(SIMD_STATUS_MESSAGE "Enabling Sparc64 HPC-ACE SIMD instructions")
cmake .. -DCMAKE_C_COMPILER=mpicc \
-DCMAKE_CXX_COMPILER=mpicxx \
- -DCMAKE_TOOLCHAIN_FILE=Platform/BlueGeneQ-static-XL-CXX \
+ -DCMAKE_TOOLCHAIN_FILE=Platform/BlueGeneQ-static-XL-CXX.cmake \
-DCMAKE_PREFIX_PATH=/your/fftw/installation/prefix \
-DGMX_MPI=ON \
-DGMX_BUILD_MDRUN_ONLY=ON
This is the architecture of the K computer, which uses Fujitsu
`Sparc64VIIIfx` chips. On this platform, GROMACS @PROJECT_VERSION@ has
-accelerated group kernels, no accelerated Verlet kernels, and a custom
-build toolchain.
+accelerated group kernels using the HPC-ACE instructions, no
+accelerated Verlet kernels, and a custom build toolchain. Since this
+particular chip only does double precision SIMD, the default setup
+is to build Gromacs in double. Since most users only need single, we have added
+an option GMX_RELAXED_DOUBLE_PRECISION to accept single precision square root
+accuracy in the group kernels; unless you know that you really need 15 digits
+of accuracy in each individual force, we strongly recommend you use this. Note
+that all summation and other operations are still done in double.
+
+The recommended configuration is to use
+
+ cmake .. -DCMAKE_TOOLCHAIN_FILE=Toolchain-Fujitsu-Sparc64-mpi.cmake \
+ -DCMAKE_PREFIX_PATH=/your/fftw/installation/prefix \
+ -DCMAKE_INSTALL_PREFIX=/where/gromacs/should/be/installed \
+ -DGMX_MPI=ON \
+ -DGMX_BUILD_MDRUN_ONLY=ON \
+ -DGMX_RELAXED_DOUBLE_PRECISION=ON
+ make
+ make install
### Intel Xeon Phi ###
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2012,2013, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
#
macro(get_compiler_info LANGUAGE BUILD_COMPILER BUILD_FLAGS)
set(${BUILD_COMPILER} "${CMAKE_${LANGUAGE}_COMPILER} ${CMAKE_${LANGUAGE}_COMPILER_ID} ${CMAKE_${LANGUAGE}_COMPILER_VERSION}")
- string(TOUPPER ${CMAKE_BUILD_TYPE} _build_type)
+ string(TOUPPER "${CMAKE_BUILD_TYPE}" _build_type)
set(${BUILD_FLAGS} "${CMAKE_${LANGUAGE}_FLAGS} ${CMAKE_${LANGUAGE}_FLAGS_${_build_type}}")
endmacro()
*
* \inlibraryapi
*/
+#ifndef GMX_CONFIG_H
+#define GMX_CONFIG_H
#include "gromacs/utility/gmx_header_config.h"
/* TODO: For now, disable Doxygen warnings from here */
/* Define if we have zlib */
#cmakedefine HAVE_ZLIB
+#endif
+
/*! \endcond */
include_directories(BEFORE ${GTEST_DIR})
include_directories(BEFORE ${GMOCK_INCLUDE_DIRS})
include_directories(BEFORE ${GMOCK_DIR})
+
+include(CheckCXXCompilerFlag)
+check_cxx_compiler_flag(-Wno-unused-variable HAS_NO_UNUSED_VARIABLE)
+if (HAS_NO_UNUSED_VARIABLE)
+ set_source_files_properties(${GTEST_SOURCES} PROPERTIES COMPILE_FLAGS "-Wno-unused-variable")
+endif()
+
add_library(gmock STATIC ${UNITTEST_TARGET_OPTIONS} ${GMOCK_SOURCES} ${GTEST_SOURCES})
set_property(TARGET gmock APPEND PROPERTY COMPILE_DEFINITIONS "${GMOCK_COMPILE_DEFINITIONS}")
if (errorcode != TMPI_ERR_IO)
{
-#if !(defined( _WIN32 ) || defined( _WIN64 ) )
+#ifndef _MSC_VER
strncpy(strn, tmpi_errmsg[errorcode], TMPI_MAX_ERROR_STRING);
#else
strncpy_s(strn, TMPI_MAX_ERROR_STRING, tmpi_errmsg[errorcode],
}
else
{
-#if !(defined( _WIN32 ) || defined( _WIN64 ) )
+#ifndef _MSC_VER
snprintf(strn, TMPI_MAX_ERROR_STRING,
"%s: %s", tmpi_errmsg[errorcode], strerror(errno));
#else
#include <unistd.h>
#endif
+#if defined( _WIN32 ) || defined( _WIN64 )
+#include <windows.h>
+#endif
+
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#include "config.h"
#endif
+#ifdef THREAD_WINDOWS
+ #ifdef __MINGW32__
+ #define _WIN32_WINNT 0x0601 /* Windows 7*/
+ #endif
+#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#include <dmalloc.h>
#endif
-
-#if !(defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) || defined (__CYGWIN__) || defined (__CYGWIN32__)
-
+#ifndef THREAD_WINDOWS
/* We don't have specific NUMA aware allocators: */
Scott Field (sfield@microsoft.com) Jan-2011
*/
-//#define _WIN32_WINNT 0x0601
#include <windows.h>
-
-
/*
__declspec(align()) may not be supported by all compilers, so define the
size of the structure manually to force alignment
return;
}
+#if defined(WIN64) || defined( _WIN64 )
+ hPriorValue = (HANDLE *)InterlockedCompareExchange64(
+ (LONGLONG volatile *)&g_hHeap,
+ (LONGLONG) hHeapNew,
+ 0
+ );
+#else
hPriorValue = (HANDLE *)InterlockedCompareExchange(
(LONG volatile *)&g_hHeap,
(LONG) hHeapNew,
0
);
+#endif
if (hPriorValue != NULL)
{
};
/* the thread_starter function that sets the thread id */
+#ifdef __MINGW32__
+__attribute__((force_align_arg_pointer))
+#endif
static void *tMPI_Thread_starter(void *arg)
{
struct tMPI_Thread_starter *starter = (struct tMPI_Thread_starter *)arg;
digits = 1;
}
}
-#if !(defined( _WIN32 ) || defined( _WIN64 ) )
+#ifndef _MSC_VER
strcpy(name, "thread #");
#else
strncpy_s(name, TMPI_MAX_PROCESSOR_NAME, "thread #", TMPI_MAX_PROCESSOR_NAME);
#ifdef THREAD_WINDOWS
/* the win32 header */
+#ifdef __MINGW32__
+/* Couple of types (e.g. PROCESSOR_NUMBER) are only available since
+ * WinServer2008 (0x600) and Windows7 (0x601). MingW doesn't have
+ * it defined for 0x600 in the headers */
+#define _WIN32_WINNT 0x0601
+#endif
#include <windows.h>
#include "thread_mpi/atomic.h"
#include "thread_mpi/threads.h"
#include "impl.h"
+#include "unused.h"
#include "winthreads.h"
struct tMPI_Thread *thread;
};
+#ifdef __GNUC__
+__attribute__((force_align_arg_pointer))
+#endif
static DWORD WINAPI tMPI_Win32_thread_starter( LPVOID lpParam )
{
struct tMPI_Thread_starter_param *prm =
}
-void tMPI_Thread_exit(void *value_ptr)
+void tMPI_Thread_exit(void tmpi_unused *value_ptr)
{
/* TODO: call destructors for thread-local storage */
ExitThread( 0 );
-int tMPI_Thread_key_create(tMPI_Thread_key_t *key, void (*destructor)(void *))
+int tMPI_Thread_key_create(tMPI_Thread_key_t *key, void (*destructor)(void *) tmpi_unused)
{
if (key == NULL)
{
#endif /* USE_STD_INTTYPES_H */
-
#ifndef USE_WINDOWS
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
#define USE_WINDOWS
#endif /* win32... */
#endif /* not defined USE_WINDOWS */
+#ifdef USE_WINDOWS
+#define TNG_PRIsize "Iu"
+#else
+#define TNG_PRIsize "zu"
+#endif
+
#ifndef DECLSPECDLLEXPORT
#ifdef USE_WINDOWS
#define DECLSPECDLLEXPORT __declspec(dllexport)
#include "compression/tng_compress.h"
#include "tng/version.h"
-#ifdef _MSC_VER
-#define fseeko _fseeki64
-#define ftello _ftelli64
+#if defined( _WIN32 ) || defined( _WIN64 )
+ #ifndef fseeko
+ #define fseeko _fseeki64
+ #endif
+ #ifndef ftello
+ #ifdef __MINGW32__
+ #define ftello ftello64
+ #else
+ #define ftello _ftelli64
+ #endif
+ #endif
#endif
struct tng_bond {
*block_p = malloc(sizeof(struct tng_gen_block));
if(!*block_p)
{
- fprintf(stderr, "TNG library: Cannot allocate memory (%lu bytes). %s: %d\n",
+ fprintf(stderr, "TNG library: Cannot allocate memory (%"TNG_PRIsize" bytes). %s: %d\n",
sizeof(struct tng_gen_block), __FILE__, __LINE__);
return(TNG_CRITICAL);
}
frame_set->n_particle_data_blocks);
if(!data)
{
- fprintf(stderr, "TNG library: Cannot allocate memory (%lu bytes). %s: %d\n",
+ fprintf(stderr, "TNG library: Cannot allocate memory (%"TNG_PRIsize" bytes). %s: %d\n",
sizeof(struct tng_particle_data) *
frame_set->n_particle_data_blocks,
__FILE__, __LINE__);
tng_data->n_particle_data_blocks);
if(!data)
{
- fprintf(stderr, "TNG library: Cannot allocate memory (%lu bytes). %s: %d\n",
+ fprintf(stderr, "TNG library: Cannot allocate memory (%"TNG_PRIsize" bytes). %s: %d\n",
sizeof(struct tng_particle_data) *
tng_data->n_particle_data_blocks,
__FILE__, __LINE__);
temp_name = realloc(block->name, len);
if(!temp_name)
{
- fprintf(stderr, "TNG library: Cannot allocate memory (%lud bytes). %s: %d\n", len,
+ fprintf(stderr, "TNG library: Cannot allocate memory (%"TNG_PRIsize" bytes). %s: %d\n", len,
__FILE__, __LINE__);
free(block->name);
block->name = 0;
frame_set->n_data_blocks);
if(!data)
{
- fprintf(stderr, "TNG library: Cannot allocate memory (%lu bytes). %s: %d\n",
+ fprintf(stderr, "TNG library: Cannot allocate memory (%"TNG_PRIsize" bytes). %s: %d\n",
sizeof(struct tng_non_particle_data) * frame_set->n_data_blocks,
__FILE__, __LINE__);
free(frame_set->tr_data);
tng_data->n_data_blocks);
if(!data)
{
- fprintf(stderr, "TNG library: Cannot allocate memory (%lu bytes). %s: %d\n",
+ fprintf(stderr, "TNG library: Cannot allocate memory (%"TNG_PRIsize" bytes). %s: %d\n",
sizeof(struct tng_non_particle_data) * tng_data->n_data_blocks,
__FILE__, __LINE__);
free(tng_data->non_tr_data);
*molecule_p = malloc(sizeof(struct tng_molecule));
if(!*molecule_p)
{
- fprintf(stderr, "TNG library: Cannot allocate memory (%lu bytes). %s: %d\n",
+ fprintf(stderr, "TNG library: Cannot allocate memory (%"TNG_PRIsize" bytes). %s: %d\n",
sizeof(struct tng_molecule), __FILE__, __LINE__);
return(TNG_CRITICAL);
}
*tng_data_p = malloc(sizeof(struct tng_trajectory));
if(!*tng_data_p)
{
- fprintf(stderr, "TNG library: Cannot allocate memory (%lu bytes). %s: %d\n",
+ fprintf(stderr, "TNG library: Cannot allocate memory (%"TNG_PRIsize" bytes). %s: %d\n",
sizeof(struct tng_trajectory), __FILE__, __LINE__);
return(TNG_CRITICAL);
}
*dest_p = malloc(sizeof(struct tng_trajectory));
if(!*dest_p)
{
- fprintf(stderr, "TNG library: Cannot allocate memory (%lu bytes). %s: %d\n",
+ fprintf(stderr, "TNG library: Cannot allocate memory (%"TNG_PRIsize" bytes). %s: %d\n",
sizeof(struct tng_trajectory), __FILE__, __LINE__);
return(TNG_CRITICAL);
}
return errorString;
}
-#elif defined(_MSC_VER)
+#elif defined( _WIN32 ) || defined( _WIN64 )
#include <windows.h>
static CHAR szBuf[80];
DWORD dw = GetLastError();
- sprintf(szBuf, "vmddlopen failed: GetLastError returned %u\n", dw);
+ sprintf(szBuf, "vmddlopen failed: GetLastError returned %lu\n", dw);
return szBuf;
}
fprintf(stderr, "Will write debug log file: %s\n", filename.c_str());
gmx_init_debug(optionsHolder.debugLevel(), filename.c_str());
}
-#if defined(HAVE_UNISTD_H) && !defined(GMX_NO_NICE)
+#if defined(HAVE_UNISTD_H) && !defined(GMX_NO_NICE) && !defined(__MINGW32__)
// Set the nice level unless disabled in the configuration.
if (optionsHolder.niceLevel() != 0)
{
// tng_last_program_name_set(*tng, programInfo);
// }
-#ifdef HAVE_UNISTD_H
+#if defined(HAVE_UNISTD_H) && !defined(__MINGW32__)
char username[256];
if (!getlogin_r(username, 256))
{
#ifndef GMX_NATIVE_WINDOWS
#include <glob.h>
#else
+#ifndef _WIN32_IE
+#define _WIN32_IE 0x0500 /* SHGetFolderPath is available since WinXP/IE5 */
+#endif
#include <windows.h>
#include <shlobj.h>
#endif
return -1;
}
+#ifdef _MSC_VER
return _chsize_s( fileno(fp), size);
+#else
+ return _chsize( fileno(fp), size);
+#endif
#endif
}
#endif
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
-#ifdef _MSC_VER
+#ifdef GMX_NATIVE_WINDOWS
/* MSVC definition for __cpuid() */
-#include <intrin.h>
+ #ifdef _MSC_VER
+ #include <intrin.h>
+ #endif
/* sysinfo functions */
-#include <windows.h>
+ #include <windows.h>
#endif
#ifdef HAVE_UNISTD_H
/* sysconf() definition */
-#include <unistd.h>
+ #include <unistd.h>
#endif
#include "gromacs/legacyheaders/gmx_cpuid.h"
#include "gmxpre.h"
#include "config.h"
-#if defined(HAVE_SCHED_H)
+#ifdef HAVE_SCHED_AFFINITY
# ifndef _GNU_SOURCE
# define _GNU_SOURCE 1
# endif
#define NSTATES 2
int i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
real shX, shY, shZ;
- real Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
- real Vcoul[NSTATES], Vvdw[NSTATES];
+ real tx, ty, tz, Fscal;
+ double FscalC[NSTATES], FscalV[NSTATES]; /* Needs double for sc_power==48 */
+ double Vcoul[NSTATES], Vvdw[NSTATES]; /* Needs double for sc_power==48 */
real rinv6, r, rt, rtC, rtV;
real iqA, iqB;
real qq[NSTATES], vctot, krsq;
double dvdl_coul, dvdl_vdw;
real lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
real sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
- real rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
+ double rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV; /* Needs double for sc_power==48 */
real sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
int do_tab, tab_elemsize;
int n0, n1C, n1V, nnn;
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef _kernelutil_sparc64_hpc_ace_double_h_
#define _kernelutil_sparc64_hpc_ace_double_h_
+/* Get gmx_simd_exp_d() */
+#include "gromacs/simd/simd.h"
+#include "gromacs/simd/simd_math.h"
+
/* Fujitsu header borrows the name from SSE2, since some instructions have aliases */
#include <emmintrin.h>
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
**************************/
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
**************************/
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* COULOMB ELECTROSTATICS */
velec = _fjsp_mul_v2r8(qq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* COULOMB ELECTROSTATICS */
velec = _fjsp_mul_v2r8(qq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* COULOMB ELECTROSTATICS */
velec = _fjsp_mul_v2r8(qq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* COULOMB ELECTROSTATICS */
velec = _fjsp_mul_v2r8(qq00,rinv00);
**************************/
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
**************************/
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
_fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
- c6grid_00 = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+ c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+ vdwgridparam+vdwioffset0+vdwjidx0B);
/* EWALD ELECTROSTATICS */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
_fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
- c6grid_00 = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+ c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+ vdwgridparam+vdwioffset0+vdwjidx0B);
/* EWALD ELECTROSTATICS */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
_fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
- c6grid_00 = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+ c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+ vdwgridparam+vdwioffset0+vdwjidx0B);
/* EWALD ELECTROSTATICS */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
_fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
- c6grid_00 = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+ c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+ vdwgridparam+vdwioffset0+vdwjidx0B);
/* EWALD ELECTROSTATICS */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
_fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
_fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
_fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
- c6grid_00 = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+ c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+ vdwgridparam+vdwioffset0+vdwjidx0B);
/* Analytical LJ-PME */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
_fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
- c6grid_00 = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+ c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+ vdwgridparam+vdwioffset0+vdwjidx0B);
/* Analytical LJ-PME */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
_fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
_fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* EWALD ELECTROSTATICS */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* EWALD ELECTROSTATICS */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* EWALD ELECTROSTATICS */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* EWALD ELECTROSTATICS */
{
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
{
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* EWALD ELECTROSTATICS */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* EWALD ELECTROSTATICS */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* EWALD ELECTROSTATICS */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* EWALD ELECTROSTATICS */
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
/* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
- c6grid_00 = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+ c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+ vdwgridparam+vdwioffset0+vdwjidx0B);
/* EWALD ELECTROSTATICS */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
/* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
- c6grid_00 = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+ c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+ vdwgridparam+vdwioffset0+vdwjidx0B);
/* EWALD ELECTROSTATICS */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
/* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
- c6grid_00 = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+ c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+ vdwgridparam+vdwioffset0+vdwjidx0B);
/* EWALD ELECTROSTATICS */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
/* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
- c6grid_00 = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+ c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+ vdwgridparam+vdwioffset0+vdwjidx0B);
/* EWALD ELECTROSTATICS */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
/* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
/* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
/* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
- c6grid_00 = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+ c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+ vdwgridparam+vdwioffset0+vdwjidx0B);
/* Analytical LJ-PME */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
/* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
- c6grid_00 = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+ c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+ vdwgridparam+vdwioffset0+vdwjidx0B);
/* Analytical LJ-PME */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
/* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
/* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* EWALD ELECTROSTATICS */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* EWALD ELECTROSTATICS */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* EWALD ELECTROSTATICS */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* EWALD ELECTROSTATICS */
**************************/
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
**************************/
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
isaprod = _fjsp_mul_v2r8(isai0,isaj0);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
isaprod = _fjsp_mul_v2r8(isai0,isaj0);
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
_fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
- c6grid_00 = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+ c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+ vdwgridparam+vdwioffset0+vdwjidx0B);
/* Analytical LJ-PME */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
_fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
- c6grid_00 = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+ c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+ vdwgridparam+vdwioffset0+vdwjidx0B);
/* Analytical LJ-PME */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
/* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
- c6grid_00 = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+ c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+ vdwgridparam+vdwioffset0+vdwjidx0B);
/* Analytical LJ-PME */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
/* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
- c6grid_00 = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+ c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+ vdwgridparam+vdwioffset0+vdwjidx0B);
/* Analytical LJ-PME */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
{
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
{
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
**************************/
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
**************************/
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* REACTION-FIELD ELECTROSTATICS */
velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* REACTION-FIELD ELECTROSTATICS */
felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* REACTION-FIELD ELECTROSTATICS */
velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* REACTION-FIELD ELECTROSTATICS */
felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
{
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
{
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* REACTION-FIELD ELECTROSTATICS */
velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* REACTION-FIELD ELECTROSTATICS */
felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* REACTION-FIELD ELECTROSTATICS */
velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* REACTION-FIELD ELECTROSTATICS */
felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
r00 = _fjsp_mul_v2r8(rsq00,rinv00);
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* Calculate table index by multiplying r with table scale and truncate to integer */
rt = _fjsp_mul_v2r8(r00,vftabscale);
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* REACTION-FIELD ELECTROSTATICS */
velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* REACTION-FIELD ELECTROSTATICS */
felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* REACTION-FIELD ELECTROSTATICS */
velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
/* Compute parameters for interactions between i and j atoms */
qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* REACTION-FIELD ELECTROSTATICS */
felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
**************************/
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
**************************/
/* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+ gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+ vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
/* LENNARD-JONES DISPERSION/REPULSION */
/* #define INNERFLOPS INNERFLOPS+1 */
/* #endif */
/* #if 'vdw' in INTERACTION_FLAGS[I][J] */
- /* #if ROUND == 'Loop' */
gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset{I}+vdwjidx{J}A,
vdwparam+vdwioffset{I}+vdwjidx{J}B,&c6_{I}{J},&c12_{I}{J});
- /* #if 'LJEwald' in KERNEL_VDW */
+ /* #if 'LJEwald' in KERNEL_VDW */
c6grid_{I}{J} = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset{I}+vdwjidx{J}A,
vdwgridparam+vdwioffset{I}+vdwjidx{J}B);
- /* #endif */
- /* #else */
- gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset{I}+vdwjidx{J}A,&c6_{I}{J},&c12_{I}{J});
-
- /* #if 'LJEwald' in KERNEL_VDW */
- c6grid_{I}{J} = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset{I}+vdwjidx{J}A);
- /* #endif */
/* #endif */
/* #endif */
/* #endif */
rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq{I}{J},rinvsq{I}{J}),rinvsq{I}{J});
ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq{I}{J});
ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(-ewcljrsq);
+ exponent = gmx_simd_exp_d(ewcljrsq);
/* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
/* #define INNERFLOPS INNERFLOPS+9 */
/* #if 'Potential' in KERNEL_VF or KERNEL_MOD_VDW=='PotentialSwitch' */
/* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_{I}{J},_fjsp_sub_v2r8(one,poly),c6_{I}{J}),rinvsix);
+ vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_{I}{J},_fjsp_sub_v2r8(poly,one),c6_{I}{J}),rinvsix);
vvdw12 = _fjsp_mul_v2r8(c12_{I}{J},_fjsp_mul_v2r8(rinvsix,rinvsix));
/* #define INNERFLOPS INNERFLOPS+5 */
/* #if KERNEL_MOD_VDW=='PotentialShift' */
/* #endif */
/* #elif KERNEL_VF=='Force' */
/* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_{I}{J},_fjsp_msub_v2r8(one,poly));
+ f6A = _fjsp_mul_v2r8(c6grid_{I}{J},_fjsp_sub_v2r8(one,poly));
/* f6B = C6grid * exponent * beta^6 */
f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_{I}{J},one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
/* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
sprintf(err_buf, "tau-p must be > 0 instead of %g\n", ir->tau_p);
CHECK(ir->tau_p <= 0);
- if (ir->tau_p/dt_pcoupl < pcouple_min_integration_steps(ir->epc))
+ if (ir->tau_p/dt_pcoupl < pcouple_min_integration_steps(ir->epc) - 10*GMX_REAL_EPS)
{
sprintf(warn_buf, "For proper integration of the %s barostat, tau-p (%g) should be at least %d times larger than nstpcouple*dt (%g)",
EPCOUPLTYPE(ir->epc), ir->tau_p, pcouple_min_integration_steps(ir->epc), dt_pcoupl);
nstcmin = tcouple_min_integration_steps(ir->etc);
if (nstcmin > 1)
{
- if (tau_min/(ir->delta_t*ir->nsttcouple) < nstcmin)
+ if (tau_min/(ir->delta_t*ir->nsttcouple) < nstcmin - 10*GMX_REAL_EPS)
{
sprintf(warn_buf, "For proper integration of the %s thermostat, tau-t (%g) should be at least %d times larger than nsttcouple*dt (%g)",
ETCOUPLTYPE(ir->etc),
#include "config.h"
+#include <assert.h>
#include <math.h>
#include <string.h>
{
return;
}
- if ((pr->nr == 0) && (pr->param != NULL))
- {
- fprintf(stderr, "Warning: dangling pointer at %lx\n",
- (unsigned long)pr->param);
- pr->param = NULL;
- }
+ assert(!((pr->nr == 0) && (pr->param != NULL)));
if (pr->nr+extra > pr->maxnr)
{
pr->maxnr = max(1.2*pr->maxnr, pr->maxnr + extra);
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+#ifndef GMX_SIMD_IMPL_SPARC64_HPC_ACE_H
+#define GMX_SIMD_IMPL_SPARC64_HPC_ACE_H
+
+#include <math.h>
+/* Fujitsu header borrows the name from SSE2, since some instructions have aliases */
+#include <emmintrin.h>
+
+
+/* Sparc64 HPC-ACE SIMD instruction wrappers
+ *
+ * Please see documentation in gromacs/simd/simd.h for defines.
+ */
+
+/* Capability definitions for Sparc64 HPC-ACE */
+/* HPC-ACE is actually double-only on the register level, but we also implement
+ * a single-precision interface where we only offer single-precision accuracy
+ * in math functions - this can save quite a few cycles.
+ */
+#define GMX_SIMD_HAVE_FLOAT
+#define GMX_SIMD_HAVE_DOUBLE
+#define GMX_SIMD_HAVE_HARDWARE
+#undef GMX_SIMD_HAVE_LOADU
+#undef GMX_SIMD_HAVE_STOREU
+#define GMX_SIMD_HAVE_LOGICAL
+#define GMX_SIMD_HAVE_FMA
+#undef GMX_SIMD_HAVE_FRACTION
+#define GMX_SIMD_HAVE_FINT32
+#define GMX_SIMD_HAVE_FINT32_EXTRACT
+#define GMX_SIMD_HAVE_FINT32_LOGICAL
+#undef GMX_SIMD_HAVE_FINT32_ARITHMETICS
+#define GMX_SIMD_HAVE_DINT32
+#define GMX_SIMD_HAVE_DINT32_EXTRACT
+#define GMX_SIMD_HAVE_DINT32_LOGICAL
+#undef GMX_SIMD_HAVE_DINT32_ARITHMETICS
+#undef GMX_SIMD4_HAVE_FLOAT
+#undef GMX_SIMD4_HAVE_DOUBLE
+
+/* Implementation details */
+#define GMX_SIMD_FLOAT_WIDTH 2
+#define GMX_SIMD_DOUBLE_WIDTH 2
+#define GMX_SIMD_FINT32_WIDTH 2
+#define GMX_SIMD_DINT32_WIDTH 2
+#define GMX_SIMD_RSQRT_BITS 10
+#define GMX_SIMD_RCP_BITS 9
+
+/* HPC-ACE is a bit strange; some instructions like
+ * shifts only work on _integer_ versions of SIMD
+ * registers, but there are no intrinsics to load
+ * or convert, or even to cast. The only way to use
+ * them is to declare unions with the SIMD integer
+ * type. However, this will lead to extra load ops,
+ * and the normal real-to-int and int-to-real
+ * conversions work purely on the v2r8 fp regs.
+ * Since our most common usage is to convert and
+ * then extract the result for table lookups, we
+ * define the gmx_simd_fint32_t datatype to use
+ * the v2r8 rather than v2i8 SIMD type.
+ */
+
+/****************************************************
+ * SINGLE PRECISION SIMD IMPLEMENTATION *
+ ****************************************************/
+#define gmx_simd_float_t _fjsp_v2r8
+#define gmx_simd_load_f gmx_simd_load_f_sparc64_hpc_ace
+#define gmx_simd_load1_f(m) _fjsp_set_v2r8((*m), (*m))
+#define gmx_simd_set1_f(a) _fjsp_set_v2r8(a, a)
+#define gmx_simd_store_f gmx_simd_store_f_sparc64_hpc_ace
+#define gmx_simd_loadu_f gmx_simd_load_f
+/* No unaligned store of gmx_simd_float_t */
+#define gmx_simd_setzero_f _fjsp_setzero_v2r8
+#define gmx_simd_add_f _fjsp_add_v2r8
+#define gmx_simd_sub_f _fjsp_sub_v2r8
+#define gmx_simd_mul_f _fjsp_mul_v2r8
+#define gmx_simd_fmadd_f(a, b, c) _fjsp_madd_v2r8(a, b, c)
+#define gmx_simd_fmsub_f(a, b, c) _fjsp_msub_v2r8(a, b, c)
+#define gmx_simd_fnmadd_f(a, b, c) _fjsp_nmsub_v2r8(a, b, c)
+#define gmx_simd_fnmsub_f(a, b, c) _fjsp_nmadd_v2r8(a, b, c)
+#define gmx_simd_and_f _fjsp_and_v2r8
+#define gmx_simd_andnot_f _fjsp_andnot1_v2r8
+#define gmx_simd_or_f _fjsp_or_v2r8
+#define gmx_simd_xor_f _fjsp_xor_v2r8
+#define gmx_simd_rsqrt_f _fjsp_rsqrta_v2r8
+#define gmx_simd_rcp_f _fjsp_rcpa_v2r8
+#define gmx_simd_fabs_f(x) _fjsp_abs_v2r8(x)
+#define gmx_simd_fneg_f(x) _fjsp_neg_v2r8(x)
+#define gmx_simd_max_f _fjsp_max_v2r8
+#define gmx_simd_min_f _fjsp_min_v2r8
+#define gmx_simd_round_f(x) gmx_simd_round_d(x)
+#define gmx_simd_trunc_f(x) gmx_simd_trunc_d(x)
+#define gmx_simd_fraction_f(x) gmx_simd_sub_f(x, gmx_simd_trunc_f(x))
+#define gmx_simd_get_exponent_f gmx_simd_get_exponent_d_sparc64_hpc_ace
+#define gmx_simd_get_mantissa_f gmx_simd_get_mantissa_d_sparc64_hpc_ace
+#define gmx_simd_set_exponent_f gmx_simd_set_exponent_d_sparc64_hpc_ace
+/* integer datatype corresponding to float: gmx_simd_fint32_t */
+#define gmx_simd_fint32_t _fjsp_v2r8
+#define gmx_simd_load_fi(m) gmx_simd_load_di_sparc64_hpc_ace(m)
+#define gmx_simd_set1_fi(i) gmx_simd_set1_di_sparc64_hpc_ace(i)
+#define gmx_simd_store_fi(m, x) gmx_simd_store_di_sparc64_hpc_ace(m, x)
+#define gmx_simd_loadu_fi gmx_simd_load_fi
+/* No unaligned store of gmx_simd_fint32_t */
+#define gmx_simd_setzero_fi _fjsp_setzero_v2r8
+#define gmx_simd_cvt_f2i gmx_simd_cvt_d2i
+#define gmx_simd_cvtt_f2i _fjsp_dtox_v2r8
+#define gmx_simd_cvt_i2f _fjsp_xtod_v2r8
+#define gmx_simd_extract_fi gmx_simd_extract_di_sparc64_hpc_ace
+/* Integer logical ops on gmx_simd_fint32_t */
+/* Shifts are horrible since they require memory re-loads. */
+#define gmx_simd_slli_fi gmx_simd_slli_di_sparc64_hpc_ace
+#define gmx_simd_srli_fi gmx_simd_srli_di_sparc64_hpc_ace
+#define gmx_simd_and_fi _fjsp_and_v2r8
+#define gmx_simd_andnot_fi(a, b) _fjsp_andnot1_v2r8(a, b)
+#define gmx_simd_or_fi _fjsp_or_v2r8
+#define gmx_simd_xor_fi _fjsp_xor_v2r8
+/* No integer arithmetic ops on gmx_simd_fint32_t */
+/* Boolean & comparison operations on gmx_simd_float_t */
+#define gmx_simd_fbool_t _fjsp_v2r8
+#define gmx_simd_cmpeq_f _fjsp_cmpeq_v2r8
+#define gmx_simd_cmplt_f _fjsp_cmplt_v2r8
+#define gmx_simd_cmple_f _fjsp_cmple_v2r8
+#define gmx_simd_and_fb _fjsp_and_v2r8
+#define gmx_simd_or_fb _fjsp_or_v2r8
+#define gmx_simd_anytrue_fb gmx_simd_anytrue_d_sparc64_hpc_ace
+#define gmx_simd_blendzero_f _fjsp_and_v2r8
+#define gmx_simd_blendnotzero_f(a, sel) _fjsp_andnot1_v2r8(sel, a)
+#define gmx_simd_blendv_f(a, b, s) _fjsp_selmov_v2r8(b, a, s)
+#define gmx_simd_reduce_f(a) gmx_simd_reduce_d_sparc64_hpc_ace(a)
+/* No boolean & comparison operations on gmx_simd_fint32_t */
+/* No conversions between different booleans */
+
+/****************************************************
+ * DOUBLE PRECISION SIMD IMPLEMENTATION *
+ ****************************************************/
+#define gmx_simd_double_t _fjsp_v2r8
+#define gmx_simd_load_d _fjsp_load_v2r8
+#define gmx_simd_load1_d(m) _fjsp_set_v2r8((*m), (*m))
+#define gmx_simd_set1_d(a) _fjsp_set_v2r8(a, a)
+#define gmx_simd_store_d _fjsp_store_v2r8
+#define gmx_simd_loadu_d gmx_simd_load_d
+/* No unaligned store of gmx_simd_double_t */
+#define gmx_simd_setzero_d _fjsp_setzero_v2r8
+#define gmx_simd_add_d _fjsp_add_v2r8
+#define gmx_simd_sub_d _fjsp_sub_v2r8
+#define gmx_simd_mul_d _fjsp_mul_v2r8
+#define gmx_simd_fmadd_d(a, b, c) _fjsp_madd_v2r8(a, b, c)
+#define gmx_simd_fmsub_d(a, b, c) _fjsp_msub_v2r8(a, b, c)
+#define gmx_simd_fnmadd_d(a, b, c) _fjsp_nmsub_v2r8(a, b, c)
+#define gmx_simd_fnmsub_d(a, b, c) _fjsp_nmadd_v2r8(a, b, c)
+#define gmx_simd_and_d _fjsp_and_v2r8
+#define gmx_simd_andnot_d _fjsp_andnot1_v2r8
+#define gmx_simd_or_d _fjsp_or_v2r8
+#define gmx_simd_xor_d _fjsp_xor_v2r8
+#define gmx_simd_rsqrt_d(x) _fjsp_rsqrta_v2r8(x)
+#define gmx_simd_rcp_d(x) _fjsp_rcpa_v2r8(x)
+#define gmx_simd_fabs_d(x) _fjsp_abs_v2r8(x)
+#define gmx_simd_fneg_d(x) _fjsp_neg_v2r8(x)
+#define gmx_simd_max_d _fjsp_max_v2r8
+#define gmx_simd_min_d _fjsp_min_v2r8
+#define gmx_simd_round_d(x) gmx_simd_cvt_i2d(gmx_simd_cvt_d2i(x))
+#define gmx_simd_trunc_d(x) gmx_simd_cvt_i2d(gmx_simd_cvtt_d2i(x))
+#define gmx_simd_fraction_d(x) gmx_simd_sub_d(x, gmx_simd_trunc_d(x))
+#define gmx_simd_get_exponent_d gmx_simd_get_exponent_d_sparc64_hpc_ace
+#define gmx_simd_get_mantissa_d gmx_simd_get_mantissa_d_sparc64_hpc_ace
+#define gmx_simd_set_exponent_d gmx_simd_set_exponent_d_sparc64_hpc_ace
+/* integer datatype corresponding to double: gmx_simd_dint32_t */
+#define gmx_simd_dint32_t _fjsp_v2r8
+#define gmx_simd_load_di(m) gmx_simd_load_di_sparc64_hpc_ace(m)
+#define gmx_simd_set1_di(i) gmx_simd_set1_di_sparc64_hpc_ace(i)
+#define gmx_simd_store_di(m, x) gmx_simd_store_di_sparc64_hpc_ace(m, x)
+#define gmx_simd_loadu_di gmx_simd_load_di
+/* No unaligned store of gmx_simd_dint32_t */
+#define gmx_simd_setzero_di _fjsp_setzero_v2r8
+#define gmx_simd_cvt_d2i gmx_simd_cvt_d2i_sparc64_hpc_ace
+#define gmx_simd_cvtt_d2i _fjsp_dtox_v2r8
+#define gmx_simd_cvt_i2d _fjsp_xtod_v2r8
+#define gmx_simd_extract_di gmx_simd_extract_di_sparc64_hpc_ace
+/* Integer logical ops on gmx_simd_dint32_t */
+#define gmx_simd_slli_di gmx_simd_slli_di_sparc64_hpc_ace
+#define gmx_simd_srli_di gmx_simd_srli_di_sparc64_hpc_ace
+#define gmx_simd_and_di _fjsp_and_v2r8
+#define gmx_simd_andnot_di _fjsp_andnot1_v2r8
+#define gmx_simd_or_di _fjsp_or_v2r8
+#define gmx_simd_xor_di _fjsp_xor_v2r8
+/* Integer arithmetic ops on integer datatype corresponding to double */
+/* Boolean & comparison operations on gmx_simd_double_t */
+#define gmx_simd_dbool_t _fjsp_v2r8
+#define gmx_simd_cmpeq_d _fjsp_cmpeq_v2r8
+#define gmx_simd_cmplt_d _fjsp_cmplt_v2r8
+#define gmx_simd_cmple_d _fjsp_cmple_v2r8
+#define gmx_simd_and_db _fjsp_and_v2r8
+#define gmx_simd_or_db _fjsp_or_v2r8
+#define gmx_simd_anytrue_db gmx_simd_anytrue_d_sparc64_hpc_ace
+#define gmx_simd_blendzero_d _fjsp_and_v2r8
+#define gmx_simd_blendnotzero_d(a, sel) _fjsp_andnot1_v2r8(sel, a)
+#define gmx_simd_blendv_d(a, b, sel) _fjsp_selmov_v2r8(b, a, sel)
+#define gmx_simd_reduce_d(a) gmx_simd_reduce_d_sparc64_hpc_ace(a)
+
+/* No boolean & comparison operations on gmx_simd_dint32_t */
+/* Float/double conversion */
+#define gmx_simd_cvt_f2d(f) (f)
+#define gmx_simd_cvt_d2f(d) (d)
+
+
+/****************************************************
+ * SINGLE PRECISION IMPLEMENTATION HELPER FUNCTIONS *
+ ****************************************************/
+static gmx_inline gmx_simd_float_t
+gmx_simd_load_f_sparc64_hpc_ace(const float *m)
+{
+ /* We are not allowed to cast single-to-double registers, but we can
+ * masquerade the memory location as a variable of type _fjsp_v2r4.
+ */
+ const _fjsp_v2r4 *p = (const _fjsp_v2r4 *)m;
+ _fjsp_v2r4 simd;
+
+ simd = *p;
+ return _fjsp_stod_v2r8(simd);
+}
+
+static gmx_inline void
+gmx_simd_store_f_sparc64_hpc_ace(float *m, gmx_simd_float_t x)
+{
+ /* We are not allowed to cast single-to-double registers, but we can
+ * masquerade the memory location as a variable of type _fjsp_v2r4.
+ */
+ _fjsp_v2r4 *p = (_fjsp_v2r4 *)m;
+ *p = _fjsp_dtos_v2r4(x);
+}
+
+static gmx_inline gmx_simd_dint32_t
+gmx_simd_load_di_sparc64_hpc_ace(const int *m)
+{
+ union
+ {
+ _fjsp_v2r8 simd;
+ long long int i[2];
+ }
+ conv;
+
+ conv.i[0] = m[0];
+ conv.i[1] = m[1];
+
+ return _fjsp_load_v2r8( (double *) &(conv.simd) );
+}
+
+static gmx_inline void
+gmx_simd_store_di_sparc64_hpc_ace(int *m, gmx_simd_dint32_t x)
+{
+ union
+ {
+ _fjsp_v2r8 simd;
+ long long int i[2];
+ }
+ conv;
+
+ _fjsp_store_v2r8( (double *) &(conv.simd), x );
+
+ m[0] = conv.i[0];
+ m[1] = conv.i[1];
+}
+
+static gmx_inline gmx_simd_dint32_t
+gmx_simd_set1_di_sparc64_hpc_ace(int i)
+{
+ union
+ {
+ _fjsp_v2r8 simd;
+ long long int i[2];
+ }
+ conv;
+
+ conv.i[0] = i;
+ conv.i[1] = i;
+
+ return _fjsp_load_v2r8( (double *) &(conv.simd) );
+}
+
+static gmx_inline int
+gmx_simd_extract_di_sparc64_hpc_ace(gmx_simd_dint32_t x, int i)
+{
+ long long int res;
+ /* This conditional should be optimized away at compile time */
+ if (i == 0)
+ {
+ _fjsp_storel_v2r8((double *)&res, x);
+ }
+ else
+ {
+ _fjsp_storeh_v2r8((double *)&res, x);
+ }
+ return (int)res;
+}
+
+static gmx_inline gmx_simd_dint32_t
+gmx_simd_slli_di_sparc64_hpc_ace(gmx_simd_dint32_t x, int i)
+{
+ _fjsp_v2i8 ix = *((_fjsp_v2i8 *)&x);
+ ix = _fjsp_slli_v2i8(ix, i);
+ x = *((_fjsp_v2r8 *)&ix);
+ return x;
+}
+
+static gmx_inline gmx_simd_dint32_t
+gmx_simd_srli_di_sparc64_hpc_ace(gmx_simd_dint32_t x, int i)
+{
+ _fjsp_v2i8 ix = *((_fjsp_v2i8 *)&x);
+ ix = _fjsp_srli_v2i8(ix, i);
+ x = *((_fjsp_v2r8 *)&ix);
+ return x;
+}
+
+static gmx_inline gmx_simd_dint32_t
+gmx_simd_cvt_d2i_sparc64_hpc_ace(gmx_simd_double_t x)
+{
+ _fjsp_v2r8 signbit = _fjsp_set_v2r8(-0.0, -0.0);
+ _fjsp_v2r8 half = _fjsp_set_v2r8(0.5, 0.5);
+
+ x = _fjsp_add_v2r8(x, _fjsp_or_v2r8(_fjsp_and_v2r8(signbit, x), half));
+ return _fjsp_dtox_v2r8(x);
+}
+
+static gmx_inline int
+gmx_simd_anytrue_d_sparc64_hpc_ace(gmx_simd_dbool_t x)
+{
+ long long int i;
+ x = _fjsp_or_v2r8(x, _fjsp_unpackhi_v2r8(x, x));
+ _fjsp_storel_v2r8((double *)&i, x);
+ return (i != 0LL);
+}
+
+static gmx_inline double
+gmx_simd_reduce_d_sparc64_hpc_ace(gmx_simd_double_t x)
+{
+ double d;
+ x = _fjsp_add_v2r8(x, _fjsp_unpackhi_v2r8(x, x));
+ _fjsp_storel_v2r8(&d, x);
+ return d;
+}
+
+
+static gmx_inline gmx_simd_double_t
+gmx_simd_get_exponent_d_sparc64_hpc_ace(gmx_simd_double_t x)
+{
+ /* HPC-ACE cannot cast _fjsp_v2r8 to _fjsp_v4i4, so to perform shifts we
+ * would need to store and reload. Since we are only operating on two
+ * numbers it is likely more efficient to do the operations directly on
+ * normal registers.
+ */
+ const gmx_int64_t expmask = 0x7ff0000000000000LL;
+ const gmx_int64_t expbias = 1023LL;
+
+ union
+ {
+ _fjsp_v2r8 simd;
+ long long int i[2];
+ }
+ conv;
+
+ _fjsp_store_v2r8( (double *) &conv.simd, x);
+ conv.i[0] = ((conv.i[0] & expmask) >> 52) - expbias;
+ conv.i[1] = ((conv.i[1] & expmask) >> 52) - expbias;
+ x = _fjsp_load_v2r8( (double *) &conv.simd);
+ return _fjsp_xtod_v2r8(x);
+}
+
+static gmx_inline gmx_simd_double_t
+gmx_simd_get_mantissa_d_sparc64_hpc_ace(gmx_simd_double_t x)
+{
+ gmx_int64_t mantmask[2] = {0x000fffffffffffffLL, 0x000fffffffffffffLL};
+ gmx_simd_double_t one = _fjsp_set_v2r8(1.0, 1.0);
+
+ x = _fjsp_and_v2r8(x, _fjsp_load_v2r8((double *)mantmask));
+ return _fjsp_or_v2r8(x, one);
+}
+
+static gmx_inline gmx_simd_double_t
+gmx_simd_set_exponent_d_sparc64_hpc_ace(gmx_simd_double_t x)
+{
+ const gmx_int64_t expbias = 1023;
+ union
+ {
+ _fjsp_v2r8 simd;
+ long long int i[2];
+ }
+ conv;
+
+
+ _fjsp_store_v2r8( (double *) &conv.simd, gmx_simd_cvt_d2i_sparc64_hpc_ace(x));
+ conv.i[0] = (conv.i[0] + expbias) << 52;
+ conv.i[1] = (conv.i[1] + expbias) << 52;
+
+ return _fjsp_load_v2r8( (double *) &conv.simd);
+}
+
+
+/* No SIMD4 support, since both single & double are only 2-wide */
+
+#endif /* GMX_SIMD_IMPL_SPARC64_HPC_ACE_H */
# include "gromacs/simd/impl_x86_sse2/impl_x86_sse2.h"
#elif defined GMX_SIMD_IBM_QPX
# include "gromacs/simd/impl_ibm_qpx/impl_ibm_qpx.h"
+#elif defined GMX_SIMD_SPARC64_HPC_ACE
+# include "gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace.h"
#elif (defined GMX_SIMD_REFERENCE) || (defined DOXYGEN)
/* Plain C SIMD reference implementation, also serves as documentation.
* For now this code path will also be taken for Sparc64_HPC_ACE since we have
const gmx_simd_real_t rSimd_ExpDouble = setSimdRealFrom3R( 6.287393598732017379054414e+176,
8.794495252903116023030553e-140,
-3.637060701570496477655022e+202);
-// Magic FP numbers corresponding to specific bit patterns
-const gmx_simd_real_t rSimd_Bits1 = setSimdRealFrom1R(-1.07730874267432137e+236);
-const gmx_simd_real_t rSimd_Bits2 = setSimdRealFrom1R(-9.25596313493178307e+061);
-const gmx_simd_real_t rSimd_Bits3 = setSimdRealFrom1R(-8.57750588235293981e+003);
-const gmx_simd_real_t rSimd_Bits4 = setSimdRealFrom1R( 1.22416778341839096e-250);
-const gmx_simd_real_t rSimd_Bits5 = setSimdRealFrom1R(-1.15711777004554095e+294);
-const gmx_simd_real_t rSimd_Bits6 = setSimdRealFrom1R( 1.53063836115600621e-018);
-# else
-// Magic FP numbers corresponding to specific bit patterns
-const gmx_simd_real_t rSimd_Bits1 = setSimdRealFrom1R(-5.9654142337e+29);
-const gmx_simd_real_t rSimd_Bits2 = setSimdRealFrom1R(-1.0737417600e+08);
-const gmx_simd_real_t rSimd_Bits3 = setSimdRealFrom1R(-6.0235290527e+00);
-const gmx_simd_real_t rSimd_Bits4 = setSimdRealFrom1R( 1.0788832913e-31);
-const gmx_simd_real_t rSimd_Bits5 = setSimdRealFrom1R(-1.0508719529e+37);
-const gmx_simd_real_t rSimd_Bits6 = setSimdRealFrom1R( 1.1488970369e-02);
# endif
#endif // GMX_SIMD_HAVE_REAL
#ifdef GMX_SIMD_HAVE_INT32
const gmx_simd4_real_t rSimd_ExpDouble = setSimd4RealFrom3R( 6.287393598732017379054414e+176,
8.794495252903116023030553e-140,
-3.637060701570496477655022e+202);
-// Magic FP numbers corresponding to specific bit patterns
-const gmx_simd4_real_t rSimd4_Bits1 = setSimd4RealFrom1R(-1.07730874267432137e+236);
-const gmx_simd4_real_t rSimd4_Bits2 = setSimd4RealFrom1R(-9.25596313493178307e+061);
-const gmx_simd4_real_t rSimd4_Bits3 = setSimd4RealFrom1R(-8.57750588235293981e+003);
-const gmx_simd4_real_t rSimd4_Bits4 = setSimd4RealFrom1R( 1.22416778341839096e-250);
-const gmx_simd4_real_t rSimd4_Bits5 = setSimd4RealFrom1R(-1.15711777004554095e+294);
-const gmx_simd4_real_t rSimd4_Bits6 = setSimd4RealFrom1R( 1.53063836115600621e-018);
-# else
-const gmx_simd4_real_t rSimd4_Bits1 = setSimd4RealFrom1R(-5.9654142337e+29);
-const gmx_simd4_real_t rSimd4_Bits2 = setSimd4RealFrom1R(-1.0737417600e+08);
-const gmx_simd4_real_t rSimd4_Bits3 = setSimd4RealFrom1R(-6.0235290527e+00);
-const gmx_simd4_real_t rSimd4_Bits4 = setSimd4RealFrom1R( 1.0788832913e-31);
-const gmx_simd4_real_t rSimd4_Bits5 = setSimd4RealFrom1R(-1.0508719529e+37);
-const gmx_simd4_real_t rSimd4_Bits6 = setSimd4RealFrom1R( 1.1488970369e-02);
# endif
::std::vector<real>
}
#ifdef GMX_SIMD4_HAVE_LOGICAL
+/* 1.3333282470703125 has mantissa 0101010101010101 (followed by zeros)
+ * 1.79998779296875 has mantissa 1100110011001100 (followed by zeros)
+ * 1.26666259765625 has mantissa 0100010001000100 (followed by zeros)
+ * 1.8666534423828125 has mantissa 1101110111011101 (followed by zeros)
+ *
+ * Since all of them have the same exponent (2^0), the exponent will
+ * not change with AND or OR operations.
+ */
TEST_F(Simd4FloatingpointTest, gmxSimd4AndR)
{
- GMX_EXPECT_SIMD4_REAL_EQ(rSimd4_Bits3, gmx_simd4_and_r(rSimd4_Bits1, rSimd4_Bits2)); // Bits1 & Bits2 = Bits3
+ GMX_EXPECT_SIMD4_REAL_EQ(setSimd4RealFrom1R(1.26666259765625),
+ gmx_simd4_and_r(gmx_simd4_set1_r(1.3333282470703125),
+ gmx_simd4_set1_r(1.79998779296875)));
}
-TEST_F(Simd4FloatingpointTest, gmxSimd4AndnotR)
+TEST_F(Simd4FloatingpointTest, gmxSimd4OrR)
{
- GMX_EXPECT_SIMD4_REAL_EQ(rSimd4_Bits4, gmx_simd4_andnot_r(rSimd4_Bits1, rSimd4_Bits2)); // (~Bits1) & Bits2 = Bits3
+ GMX_EXPECT_SIMD4_REAL_EQ(setSimd4RealFrom1R(1.8666534423828125),
+ gmx_simd4_or_r(gmx_simd4_set1_r(1.3333282470703125),
+ gmx_simd4_set1_r(1.79998779296875)));
}
-TEST_F(Simd4FloatingpointTest, gmxSimd4OrR)
+TEST_F(Simd4FloatingpointTest, gmxSimd4XorR)
{
- GMX_EXPECT_SIMD4_REAL_EQ(rSimd4_Bits5, gmx_simd4_or_r(rSimd4_Bits1, rSimd4_Bits2)); // Bits1 | Bits2 = Bits3
+ /* Test xor by taking xor with a number and its negative. This should result
+ * in only the sign bit being set. We then use this bit change the sign of
+ * different numbers.
+ */
+ gmx_simd4_real_t signbit = gmx_simd4_xor_r(gmx_simd4_set1_r(1.5), gmx_simd4_set1_r(-1.5));
+ GMX_EXPECT_SIMD4_REAL_EQ(setSimd4RealFrom3R(-1, 2, -3), gmx_simd4_xor_r(signbit, setSimd4RealFrom3R(1, -2, 3)));
}
-TEST_F(Simd4FloatingpointTest, gmxSimd4XorR)
+TEST_F(Simd4FloatingpointTest, gmxSimd4AndnotR)
{
- GMX_EXPECT_SIMD4_REAL_EQ(rSimd4_Bits6, gmx_simd4_xor_r(rSimd4_Bits1, rSimd4_Bits2)); // Bits1 ^ Bits2 = Bits3
+ /* Use xor (which we already tested, so fix that first if both tests fail)
+ * to extract the sign bit, and then use andnot to take absolute values.
+ */
+ gmx_simd4_real_t signbit = gmx_simd4_xor_r(gmx_simd4_set1_r(1.5), gmx_simd4_set1_r(-1.5));
+ GMX_EXPECT_SIMD4_REAL_EQ(setSimd4RealFrom3R(1, 2, 3), gmx_simd4_andnot_r(signbit, setSimd4RealFrom3R(-1, 2, -3)));
}
+
#endif
TEST_F(Simd4FloatingpointTest, gmxSimd4MaxR)
}
#ifdef GMX_SIMD_HAVE_LOGICAL
+/* 1.3333282470703125 has mantissa 0101010101010101 (followed by zeros)
+ * 1.79998779296875 has mantissa 1100110011001100 (followed by zeros)
+ * 1.26666259765625 has mantissa 0100010001000100 (followed by zeros)
+ * 1.8666534423828125 has mantissa 1101110111011101 (followed by zeros)
+ *
+ * Since all of them have the same exponent (2^0), the exponent will
+ * not change with AND or OR operations.
+ */
TEST_F(SimdFloatingpointTest, gmxSimdAndR)
{
- GMX_EXPECT_SIMD_REAL_EQ(rSimd_Bits3, gmx_simd_and_r(rSimd_Bits1, rSimd_Bits2)); // Bits1 & Bits2 = Bits3
+ GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom1R(1.26666259765625),
+ gmx_simd_and_r(gmx_simd_set1_r(1.3333282470703125),
+ gmx_simd_set1_r(1.79998779296875)));
}
-TEST_F(SimdFloatingpointTest, gmxSimdAndnotR)
+TEST_F(SimdFloatingpointTest, gmxSimdOrR)
{
- GMX_EXPECT_SIMD_REAL_EQ(rSimd_Bits4, gmx_simd_andnot_r(rSimd_Bits1, rSimd_Bits2)); // (~Bits1) & Bits2 = Bits3
+ GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom1R(1.8666534423828125),
+ gmx_simd_or_r(gmx_simd_set1_r(1.3333282470703125),
+ gmx_simd_set1_r(1.79998779296875)));
}
-TEST_F(SimdFloatingpointTest, gmxSimdOrR)
+TEST_F(SimdFloatingpointTest, gmxSimdXorR)
{
- GMX_EXPECT_SIMD_REAL_EQ(rSimd_Bits5, gmx_simd_or_r(rSimd_Bits1, rSimd_Bits2)); // Bits1 | Bits2 = Bits3
+ /* Test xor by taking xor with a number and its negative. This should result
+ * in only the sign bit being set. We then use this bit change the sign of
+ * different numbers.
+ */
+ gmx_simd_real_t signbit = gmx_simd_xor_r(gmx_simd_set1_r(1.5), gmx_simd_set1_r(-1.5));
+ GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(-1, 2, -3), gmx_simd_xor_r(signbit, setSimdRealFrom3R(1, -2, 3)));
}
-TEST_F(SimdFloatingpointTest, gmxSimdXorR)
+TEST_F(SimdFloatingpointTest, gmxSimdAndnotR)
{
- GMX_EXPECT_SIMD_REAL_EQ(rSimd_Bits6, gmx_simd_xor_r(rSimd_Bits1, rSimd_Bits2)); // Bits1 ^ Bits2 = Bits3
+ /* Use xor (which we already tested, so fix that first if both tests fail)
+ * to extract the sign bit, and then use andnot to take absolute values.
+ */
+ gmx_simd_real_t signbit = gmx_simd_xor_r(gmx_simd_set1_r(1.5), gmx_simd_set1_r(-1.5));
+ GMX_EXPECT_SIMD_REAL_EQ(setSimdRealFrom3R(1, 2, 3), gmx_simd_andnot_r(signbit, setSimdRealFrom3R(-1, 2, -3)));
}
+
#endif
TEST_F(SimdFloatingpointTest, gmxSimdMaxR)
{
gmx_incons("gmx_gethostname called with len<8");
}
-#if defined(HAVE_UNISTD_H) && !defined(__native_client__)
+#if defined(HAVE_UNISTD_H) && !defined(__native_client__) && !defined(__MINGW32__)
if (gethostname(name, len-1) != 0)
{
std::strncpy(name, "unknown", 8);
char *
gmx_ctime_r(const time_t *clock, char *buf, int n)
{
- char tmpbuf[STRLEN];
-
-#ifdef GMX_NATIVE_WINDOWS
+#ifdef _MSC_VER
/* Windows */
- ctime_s( tmpbuf, STRLEN, clock );
+ ctime_s( buf, n, clock );
+#elif defined(GMX_NATIVE_WINDOWS)
+ char *tmpbuf = ctime( clock );
+ strncpy(buf, tmpbuf, n-1);
+ buf[n-1] = '\0';
#elif (defined(__sun))
/*Solaris*/
- ctime_r(clock, tmpbuf, n);
+ ctime_r(clock, buf, n);
#else
+ char tmpbuf[STRLEN];
ctime_r(clock, tmpbuf);
-#endif
strncpy(buf, tmpbuf, n-1);
buf[n-1] = '\0';
-
+#endif
return buf;
}
#endif
#ifdef GMX_NATIVE_WINDOWS
+#include <windows.h>
#include <direct.h>
#include <io.h>
#endif
return ftello(stream);
#else
#ifdef HAVE__FSEEKI64
+#ifndef __MINGW32__
return _ftelli64(stream);
+#else
+ return ftello64(stream);
+#endif
#else
return ftell(stream);
#endif
/* Our own implementation of dirent-like functionality to scan directories. */
struct gmx_directory
{
-#ifdef HAVE_DIRENT_H
- DIR * dirent_handle;
-#elif (defined GMX_NATIVE_WINDOWS)
+#if defined(GMX_NATIVE_WINDOWS)
intptr_t windows_handle;
struct _finddata_t finddata;
int first;
+#elif defined(HAVE_DIRENT_H)
+ DIR * dirent_handle;
#else
int dummy;
#endif
*p_gmxdir = gmxdir;
-#ifdef HAVE_DIRENT_H
- if ( (gmxdir->dirent_handle = opendir(dirname)) != NULL)
- {
- rc = 0;
- }
- else
- {
- sfree(gmxdir);
- *p_gmxdir = NULL;
- rc = EINVAL;
- }
-#elif (defined GMX_NATIVE_WINDOWS)
-
+#if defined(GMX_NATIVE_WINDOWS)
if (dirname != NULL && strlen(dirname) > 0)
{
char * tmpname;
{
rc = EINVAL;
}
+#elif defined(HAVE_DIRENT_H)
+ if ( (gmxdir->dirent_handle = opendir(dirname)) != NULL)
+ {
+ rc = 0;
+ }
+ else
+ {
+ sfree(gmxdir);
+ *p_gmxdir = NULL;
+ rc = EINVAL;
+ }
#else
gmx_fatal(FARGS,
"Source compiled without POSIX dirent or windows support - cannot scan directories.\n"
{
int rc;
-#ifdef HAVE_DIRENT_H
+#if defined(GMX_NATIVE_WINDOWS)
+ if (gmxdir != NULL)
+ {
+ if (gmxdir->windows_handle <= 0)
+ {
+ name[0] = '\0';
+ rc = ENOENT;
+ }
+ else if (gmxdir->first == 1)
+ {
+ strncpy(name, gmxdir->finddata.name, maxlength_name);
+ rc = 0;
+ gmxdir->first = 0;
+ }
+ else
+ {
+ if (_findnext(gmxdir->windows_handle, &gmxdir->finddata) == 0)
+ {
+ strncpy(name, gmxdir->finddata.name, maxlength_name);
+ rc = 0;
+ }
+ else
+ {
+ name[0] = '\0';
+ rc = ENOENT;
+ }
+ }
+ }
+ else
+ {
+ name[0] = '\0';
+ rc = EINVAL;
+ }
+#elif defined(HAVE_DIRENT_H)
struct dirent * direntp_large;
struct dirent * p;
name[0] = '\0';
rc = EINVAL;
}
-
-#elif (defined GMX_NATIVE_WINDOWS)
-
- if (gmxdir != NULL)
- {
- if (gmxdir->windows_handle <= 0)
- {
-
- name[0] = '\0';
- rc = ENOENT;
- }
- else if (gmxdir->first == 1)
- {
- strncpy(name, gmxdir->finddata.name, maxlength_name);
- rc = 0;
- gmxdir->first = 0;
- }
- else
- {
- if (_findnext(gmxdir->windows_handle, &gmxdir->finddata) == 0)
- {
- strncpy(name, gmxdir->finddata.name, maxlength_name);
- rc = 0;
- }
- else
- {
- name[0] = '\0';
- rc = ENOENT;
- }
- }
- }
-
#else
gmx_fatal(FARGS,
"Source compiled without POSIX dirent or windows support - cannot scan directories.\n");
gmx_directory_close(gmx_directory_t gmxdir)
{
int rc;
-#ifdef HAVE_DIRENT_H
- rc = (gmxdir != NULL) ? closedir(gmxdir->dirent_handle) : EINVAL;
-#elif (defined GMX_NATIVE_WINDOWS)
+#if defined(GMX_NATIVE_WINDOWS)
rc = (gmxdir != NULL) ? _findclose(gmxdir->windows_handle) : EINVAL;
+#elif defined(HAVE_DIRENT_H)
+ rc = (gmxdir != NULL) ? closedir(gmxdir->dirent_handle) : EINVAL;
#else
gmx_fatal(FARGS,
"Source compiled without POSIX dirent or windows support - cannot scan directories.\n");
* \ingroup module_utility
*/
-/* We currently don't support MingW. And ICC also defines it */
-#ifdef _MSC_VER
+#if defined( _WIN32 ) || defined( _WIN64 )
#define GMX_NATIVE_WINDOWS
#endif
*/
static gmx_inline void gmx_pause()
{
-#ifndef GMX_NATIVE_WINDOWS
+#ifndef _MSC_VER
/* Ugly hack because the openmp implementation below hacks into the SIMD
* settings to decide when to use _mm_pause(). This should eventually be
* changed into proper detection of the intrinsics uses, not SIMD.
*/
-#if (defined GMX_SIMD_X86_SSE2) || (defined GMX_SIMD_X86_SSE4_1) || \
+#if ((defined GMX_SIMD_X86_SSE2) || (defined GMX_SIMD_X86_SSE4_1) || \
(defined GMX_SIMD_X86_AVX_128_FMA) || (defined GMX_SIMD_X86_AVX_256) || \
- (defined GMX_SIMD_X86_AVX2_256)
+ (defined GMX_SIMD_X86_AVX2_256)) && !defined(__MINGW32__)
/* Replace with tbb::internal::atomic_backoff when/if we use TBB */
_mm_pause();
#elif defined __MIC__
#ifdef WITH_DMALLOC
#include <dmalloc.h>
#endif
+#ifdef HAVE__ALIGNED_MALLOC
+#include <malloc.h>
+#endif
#include "thread_mpi/threads.h"
}
/* This routine can NOT be called with any pointer */
-void save_free_aligned(const char *name, const char *file, int line, void *ptr)
+void save_free_aligned(const char gmx_unused *name, const char gmx_unused *file, int gmx_unused line, void *ptr)
{
int i, j;
void *free = ptr;
// TODO fix this when we have an encapsulation layer for handling
// environment variables
#ifdef GMX_NATIVE_WINDOWS
- _putenv_s("GMX_MAXBACKUP", s_maxBackup.c_str());
+ _putenv(("GMX_MAXBACKUP="+s_maxBackup).c_str());
#else
setenv("GMX_MAXBACKUP", s_maxBackup.c_str(), true);
#endif