From: Erik Lindahl Date: Sun, 6 Jan 2013 13:50:23 +0000 (+0100) Subject: Fujitsu Sparc64 acceleration and general fixes for non-x86 builds X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=a73e7b4566a637fcb63ad89d8917982d5683c25b;p=alexxy%2Fgromacs.git Fujitsu Sparc64 acceleration and general fixes for non-x86 builds Fixes configurations not to assume x86 and avoid warnings, in particular if a non-x86 acceleration is used. The cpu detection code has been extended to parse /proc/cpuinfo on Linux in cases where the x86 CPUID instruction (or the inline assembly to execute it) is not available. Finally, there are new group kernels accelerated for use on the K computer, which uses the Sparc64 HPC-ACE instruction set. These kernels are roughly ~35% faster than the compiled C version, which means Gromacs-4.6 is now ~70% faster on K than Gromacs-4.5. Change-Id: I92559f0ac6159b504f100447a41a03e4b33fec19 --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 66da0e2453..a62a3a4efa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -183,7 +183,7 @@ if(NOT DEFINED GMX_CPU_ACCELERATION) endif(NOT DEFINED GMX_CPU_ACCELERATION) set(GMX_CPU_ACCELERATION "@GMX_SUGGESTED_CPU_ACCELERATION@" - CACHE STRING "Accelerated CPU kernels. Pick one of: None, SSE2, SSE4.1, AVX_128_FMA, AVX_256, IBM_QPX") + CACHE STRING "Accelerated CPU kernels. Pick one of: None, SSE2, SSE4.1, AVX_128_FMA, AVX_256, IBM_QPX, Sparc64_HPC_ACE") set(GMX_FFT_LIBRARY "fftw3" CACHE STRING "FFT library choices: fftw3,mkl,fftpack[built-in]") @@ -874,9 +874,10 @@ elseif(${GMX_CPU_ACCELERATION} STREQUAL "IBM_QPX") else() message(FATAL_ERROR "Cannot compile IBM QPX intrinsics without the XL compiler. If you are compiling for BlueGene/Q, use 'cmake .. -DCMAKE_TOOLCHAIN_FILE=BlueGeneQ-static-XL-C' to set up the tool chain.") endif() - +elseif(${GMX_CPU_ACCELERATION} STREQUAL "SPARC64_HPC_ACE") + set(GMX_CPU_ACCELERATION_SPARC64_HPC_ACE 1) else(${GMX_CPU_ACCELERATION} STREQUAL "NONE") - MESSAGE(FATAL_ERROR "Unrecognized option for accelerated kernels: ${GMX_CPU_ACCELERATION}. Pick one of None, SSE2, SSE4.1, AVX_128_FMA, AVX_256, IBM_QPX") + MESSAGE(FATAL_ERROR "Unrecognized option for accelerated kernels: ${GMX_CPU_ACCELERATION}. Pick one of None, SSE2, SSE4.1, AVX_128_FMA, AVX_256, IBM_QPX, Sparc64_HPC_ACE") endif(${GMX_CPU_ACCELERATION} STREQUAL "NONE") set(ACCELERATION_QUIETLY TRUE CACHE INTERNAL "") @@ -928,11 +929,11 @@ if(${GMX_FFT_LIBRARY} STREQUAL "FFTW3") set(GMX_FFT_FFTW3 1) - if (NOT ${GMX_CPU_ACCELERATION} STREQUAL "NONE" AND NOT ${FFTW}_HAVE_SIMD) + if ((${GMX_CPU_ACCELERATION} MATCHES "SSE" OR ${GMX_CPU_ACCELERATION} MATCHES "AVX") AND NOT ${FFTW}_HAVE_SIMD) message(WARNING "The fftw library found is compiled without SIMD support, which makes it slow. Consider recompiling it or contact your admin") endif() - if(NOT ${GMX_CPU_ACCELERATION} STREQUAL "NONE" AND ${FFTW}_HAVE_AVX) + if((${GMX_CPU_ACCELERATION} MATCHES "SSE" OR ${GMX_CPU_ACCELERATION} MATCHES "AVX") AND ${FFTW}_HAVE_AVX) # If we're not doing CPU acceleration, we don't care about FFTW performance on x86 either message(WARNING "The FFTW library was compiled with --enable-avx to enable AVX SIMD instructions. That might sound like a good idea for your processor, but for FFTW versions up to 3.3.3, these are slower than the SSE/SSE2 SIMD instructions for the way GROMACS uses FFTs. Limitations in the way FFTW allows GROMACS to measure performance make it awkward for either GROMACS or FFTW to make the decision for you based on runtime performance. You should compile a different FFTW library with --enable-sse or --enable-sse2. If you have a more recent FFTW, you may like to compare the performance of GROMACS with FFTW libraries compiled with and without --enable-avx. However, the GROMACS developers do not really expect the FFTW AVX optimization to help, because the performance is limited by memory access, not computation.") endif() diff --git a/cmake/Toolchain-Fujitsu-Sparc64-mpi.cmake b/cmake/Toolchain-Fujitsu-Sparc64-mpi.cmake new file mode 100644 index 0000000000..14c58b9ac6 --- /dev/null +++ b/cmake/Toolchain-Fujitsu-Sparc64-mpi.cmake @@ -0,0 +1,55 @@ +# +# This file is part of the GROMACS molecular simulation package. +# +# Copyright (c) 2012, by the GROMACS development team, led by +# David van der Spoel, Berk Hess, Erik Lindahl, and including many +# others, as listed in the AUTHORS file in the top-level source +# directory and at http://www.gromacs.org. +# +# GROMACS is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2.1 +# of the License, or (at your option) any later version. +# +# GROMACS is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with GROMACS; if not, see +# http://www.gnu.org/licenses, or write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# If you want to redistribute modifications to GROMACS, please +# consider that scientific software is very special. Version +# control is crucial - bugs must be traceable. We will be happy to +# consider code for inclusion in the official distribution, but +# derived work must not be called official GROMACS. Details are found +# in the README & COPYING files - if they are missing, get the +# official version at http://www.gromacs.org. +# +# To help us fund GROMACS development, we humbly ask that you cite +# the research papers on the package. Check out http://www.gromacs.org. +# +# the name of the target operating system +set(CMAKE_SYSTEM_NAME Linux CACHE STRING "Cross-compiling for Fujitsu Sparc64") + +set_property(GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS FALSE) + +# set the compiler +set(CMAKE_C_COMPILER mpifccpx) +set(CMAKE_CXX_COMPILER mpiFCCpx) +set(CMAKE_C_COMPILER_ID "Fujitsu" CACHE STRING "Prevent CMake from adding GNU-specific linker flags (-rdynamic)" FORCE) + +set(CMAKE_C_FLAGS "-Kopenmp -Kfast,reduction,swp,simd=2,uxsimd -x500 -Xg -DGMX_RELAXED_DOUBLE_PRECISION -w" CACHE STRING "Fujitsu Sparc64 C Flags" FORCE) +set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "Fujitsu Sparc64 C++ Flags" FORCE) +set(GMX_SOFTWARE_INVSQRT OFF CACHE BOOL "Use native 1.0/sqrt(x) on Fujitsu Sparc64" FORCE) + +set(GMX_THREAD_MPI OFF CACHE BOOL "Use real MPI instead" FORCE) +set(GMX_MPI ON CACHE BOOL "Use MPI library" FORCE) +set(GMX_DOUBLE ON CACHE BOOL "Use double by default on Fujitsu Sparc64 (due to HPC-ACE)" FORCE) +set(GMX_GPU OFF CACHE BOOL "Cannot do GPU acceleration on Fujitsu Sparc64" FORCE) +set(BUILD_SHARED_LIBS OFF CACHE BOOL "Use static linking by default on Fujitsu Sparc64" FORCE) + +set(GMX_CPU_ACCELERATION "Sparc64_HPC_ACE" CACHE STRING "Enabling Sparc64 HPC-ACE acceleration when using Fujitsu Sparc64 toolchain") diff --git a/cmake/Toolchain-Fujitsu-Sparc64.cmake b/cmake/Toolchain-Fujitsu-Sparc64.cmake new file mode 100644 index 0000000000..c76c4d9ac0 --- /dev/null +++ b/cmake/Toolchain-Fujitsu-Sparc64.cmake @@ -0,0 +1,54 @@ +# +# This file is part of the GROMACS molecular simulation package. +# +# Copyright (c) 2012, by the GROMACS development team, led by +# David van der Spoel, Berk Hess, Erik Lindahl, and including many +# others, as listed in the AUTHORS file in the top-level source +# directory and at http://www.gromacs.org. +# +# GROMACS is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2.1 +# of the License, or (at your option) any later version. +# +# GROMACS is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with GROMACS; if not, see +# http://www.gnu.org/licenses, or write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# If you want to redistribute modifications to GROMACS, please +# consider that scientific software is very special. Version +# control is crucial - bugs must be traceable. We will be happy to +# consider code for inclusion in the official distribution, but +# derived work must not be called official GROMACS. Details are found +# in the README & COPYING files - if they are missing, get the +# official version at http://www.gromacs.org. +# +# To help us fund GROMACS development, we humbly ask that you cite +# the research papers on the package. Check out http://www.gromacs.org. +# +# the name of the target operating system +set(CMAKE_SYSTEM_NAME Linux CACHE STRING "Cross-compiling for Fujitsu Sparc64") + +set_property(GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS FALSE) + +# set the compiler +set(CMAKE_C_COMPILER fccpx) +set(CMAKE_CXX_COMPILER FCCpx) +set(CMAKE_C_COMPILER_ID "Fujitsu" CACHE STRING "Prevent CMake from adding GNU-specific linker flags (-rdynamic)" FORCE) + +set(CMAKE_C_FLAGS "-Kopenmp -Kfast,reduction,swp,simd=2,uxsimd -x500 -Xg -DGMX_RELAXED_DOUBLE_PRECISION -w" CACHE STRING "Fujitsu Sparc64 C Flags" FORCE) +set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "Fujitsu Sparc64 C++ Flags" FORCE) +set(GMX_SOFTWARE_INVSQRT OFF CACHE BOOL "Use native 1.0/sqrt(x) on Fujitsu Sparc64" FORCE) + +# By default CMake will use thread-mpi +set(GMX_DOUBLE ON CACHE BOOL "Use double by default on Fujitsu Sparc64 (due to HPC-ACE)" FORCE) +set(GMX_GPU OFF CACHE BOOL "Cannot do GPU acceleration on Fujitsu Sparc64" FORCE) +set(BUILD_SHARED_LIBS OFF CACHE BOOL "Use static linking by default on Fujitsu Sparc64" FORCE) + +set(GMX_CPU_ACCELERATION "Sparc64_HPC_ACE" CACHE STRING "Enabling Sparc64 HPC-ACE acceleration when using Fujitsu Sparc64 toolchain") diff --git a/include/gmx_cpuid.h b/include/gmx_cpuid.h index 3b5beb65ed..8d3a968c46 100644 --- a/include/gmx_cpuid.h +++ b/include/gmx_cpuid.h @@ -54,6 +54,8 @@ enum gmx_cpuid_vendor GMX_CPUID_VENDOR_UNKNOWN, GMX_CPUID_VENDOR_INTEL, GMX_CPUID_VENDOR_AMD, + GMX_CPUID_VENDOR_FUJITSU, + GMX_CPUID_VENDOR_IBM, GMX_CPUID_NVENDORS }; @@ -127,6 +129,7 @@ enum gmx_cpuid_acceleration GMX_CPUID_ACCELERATION_X86_SSE4_1, GMX_CPUID_ACCELERATION_X86_AVX_128_FMA, GMX_CPUID_ACCELERATION_X86_AVX_256, + GMX_CPUID_ACCELERATION_SPARC64_HPC_ACE, GMX_CPUID_NACCELERATIONS }; diff --git a/src/config.h.cmakein b/src/config.h.cmakein index 2a632cbd0d..ffa22a1fab 100644 --- a/src/config.h.cmakein +++ b/src/config.h.cmakein @@ -123,6 +123,9 @@ /* IBM QPX was selected as CPU acceleration type (e.g. BlueGene/Q) */ #cmakedefine GMX_CPU_ACCELERATION_IBM_QPX +/* Fujitsu Sparc64 HPC-ACE SIMD acceleration */ +#cmakedefine GMX_CPU_ACCELERATION_SPARC64_HPC_ACE + /* String for CPU acceleration choice (for writing to log files and stdout) */ #define GMX_CPU_ACCELERATION_STRING "@GMX_CPU_ACCELERATION@" diff --git a/src/gmxlib/gmx_cpuid.c b/src/gmxlib/gmx_cpuid.c index 402af15f41..c17e809718 100644 --- a/src/gmxlib/gmx_cpuid.c +++ b/src/gmxlib/gmx_cpuid.c @@ -64,8 +64,11 @@ * in a single file, but to avoid repeated ifdefs we set the overall architecture here. */ #if defined (__i386__) || defined (__x86_64__) || defined (_M_IX86) || defined (_M_X64) +/* OK, it is x86, but can we execute cpuid? */ +#if defined(GMX_X86_GCC_INLINE_ASM) || ( defined(_MSC_VER) && ( (_MSC_VER > 1500) || (_MSC_VER==1500 & _MSC_FULL_VER >= 150030729))) # define GMX_CPUID_X86 #endif +#endif /* Global constant character strings corresponding to our enumerated types */ const char * @@ -74,7 +77,9 @@ gmx_cpuid_vendor_string[GMX_CPUID_NVENDORS] = "CannotDetect", "Unknown", "GenuineIntel", - "AuthenticAMD" + "AuthenticAMD", + "Fujitsu", + "IBM" }; const char * @@ -125,7 +130,8 @@ gmx_cpuid_acceleration_string[GMX_CPUID_NACCELERATIONS] = "SSE2", "SSE4.1", "AVX_128_FMA", - "AVX_256" + "AVX_256", + "Sparc64 HPC-ACE" }; /* Max length of brand string */ @@ -223,6 +229,10 @@ enum gmx_cpuid_acceleration static const enum gmx_cpuid_acceleration compiled_acc = GMX_CPUID_ACCELERATION_X86_SSE2; +#elif defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE +static const +enum gmx_cpuid_acceleration + compiled_acc = GMX_CPUID_ACCELERATION_SPARC64_HPC_ACE; #else static const enum gmx_cpuid_acceleration @@ -696,6 +706,48 @@ cpuid_check_intel_x86(gmx_cpuid_t cpuid) + +static void +chomp_substring_before_colon(const char *in, char *s, int maxlength) +{ + char *p; + strncpy(s,in,maxlength); + p = strchr(s,':'); + if(p!=NULL) + { + *p='\0'; + while(isspace(*(--p)) && (p>=s)) + { + *p='\0'; + } + } + else + { + *s='\0'; + } +} + +static void +chomp_substring_after_colon(const char *in, char *s, int maxlength) +{ + char *p; + if( (p = strchr(in,':'))!=NULL) + { + p++; + while(isspace(*p)) p++; + strncpy(s,p,maxlength); + p = s+strlen(s); + while(isspace(*(--p)) && (p>=s)) + { + *p='\0'; + } + } + else + { + *s='\0'; + } +} + /* Try to find the vendor of the current CPU, so we know what specific * detection routine to call. */ @@ -706,6 +758,8 @@ cpuid_check_vendor(void) /* Register data used on x86 */ unsigned int eax, ebx, ecx, edx; char vendorstring[13]; + FILE * fp; + char buffer[255],buffer2[255]; /* Set default first */ vendor = GMX_CPUID_VENDOR_UNKNOWN; @@ -726,6 +780,29 @@ cpuid_check_vendor(void) vendor = i; } } +#elif defined(__linux__) || defined(__linux) + /* General Linux. Try to get CPU vendor from /proc/cpuinfo */ + if( (fp = fopen("/proc/cpuinfo","r")) != NULL) + { + while( (vendor == GMX_CPUID_VENDOR_UNKNOWN) && (fgets(buffer,sizeof(buffer),fp) != NULL)) + { + chomp_substring_before_colon(buffer,buffer2,sizeof(buffer2)); + /* Intel/AMD use "vendor_id", IBM "vendor". Fujitsu "manufacture". Add others if you have them! */ + if( !strcmp(buffer2,"vendor_id") || !strcmp(buffer2,"vendor") || !strcmp(buffer2,"manufacture") ) + { + chomp_substring_after_colon(buffer,buffer2,sizeof(buffer2)); + for(i=GMX_CPUID_VENDOR_UNKNOWN; ifeature[i] = 0; } + cpuid->have_cpu_topology = 0; cpuid->nproc = 0; cpuid->npackages = 0; @@ -826,20 +907,37 @@ gmx_cpuid_init (gmx_cpuid_t * pcpuid) break; #endif default: - /* Could not find vendor */ - strncpy(cpuid->brand, "Unknown CPU brand", GMX_CPUID_BRAND_MAXLEN); + /* Default value */ + strncpy(cpuid->brand,"Unknown CPU brand",GMX_CPUID_BRAND_MAXLEN); +#if defined(__linux__) || defined(__linux) + /* General Linux. Try to get CPU type from /proc/cpuinfo */ + if( (fp = fopen("/proc/cpuinfo","r")) != NULL) + { + found_brand = 0; + while( (found_brand==0) && (fgets(buffer,sizeof(buffer),fp) !=NULL)) + { + chomp_substring_before_colon(buffer,buffer2,sizeof(buffer2)); + /* Intel uses "model name", Fujitsu and IBM "cpu". */ + if( !strcmp(buffer2,"model name") || !strcmp(buffer2,"cpu")) + { + chomp_substring_after_colon(buffer,cpuid->brand,GMX_CPUID_BRAND_MAXLEN); + found_brand = 1; + } + } + } + fclose(fp); +#endif cpuid->family = 0; cpuid->model = 0; cpuid->stepping = 0; - - for (i = 0; i < GMX_CPUID_NFEATURES; i++) + + for(i=0; ifeature[i] = 0; + cpuid->feature[i]=0; } cpuid->feature[GMX_CPUID_FEATURE_CANNOTDETECT] = 1; break; } - return 0; } @@ -950,7 +1048,13 @@ gmx_cpuid_acceleration_suggest (gmx_cpuid_t cpuid) tmpacc = GMX_CPUID_ACCELERATION_X86_SSE2; } } - + else if(gmx_cpuid_vendor(cpuid)==GMX_CPUID_VENDOR_FUJITSU) + { + if(strstr(gmx_cpuid_brand(cpuid),"SPARC64")) + { + tmpacc = GMX_CPUID_ACCELERATION_SPARC64_HPC_ACE; + } + } return tmpacc; } @@ -1001,7 +1105,6 @@ gmx_cpuid_acceleration_check(gmx_cpuid_t cpuid, } - #ifdef GMX_CPUID_STANDALONE /* Stand-alone program to enable queries of CPU features from Cmake. * Note that you need to check inline ASM capabilities before compiling and set diff --git a/src/gmxlib/nonbonded/CMakeLists.txt b/src/gmxlib/nonbonded/CMakeLists.txt index 02dc83e401..e14de70666 100644 --- a/src/gmxlib/nonbonded/CMakeLists.txt +++ b/src/gmxlib/nonbonded/CMakeLists.txt @@ -35,40 +35,45 @@ # Sources that should always be built file(GLOB NONBONDED_SOURCES *.c nb_kernel_c/*.c) -if(GMX_CPU_ACCELERATION STREQUAL "SSE2" AND NOT GMX_DOUBLE) +if("${GMX_CPU_ACCELERATION}" STREQUAL "SSE2" AND NOT GMX_DOUBLE) file(GLOB NONBONDED_SSE2_SINGLE_SOURCES nb_kernel_sse2_single/*.c) endif() -if(GMX_CPU_ACCELERATION STREQUAL "SSE4.1" AND NOT GMX_DOUBLE) +if("${GMX_CPU_ACCELERATION}" STREQUAL "SSE4.1" AND NOT GMX_DOUBLE) file(GLOB NONBONDED_SSE4_1_SINGLE_SOURCES nb_kernel_sse4_1_single/*.c) endif() -if(GMX_CPU_ACCELERATION STREQUAL "AVX_128_FMA" AND NOT GMX_DOUBLE) +if("${GMX_CPU_ACCELERATION}" STREQUAL "AVX_128_FMA" AND NOT GMX_DOUBLE) file(GLOB NONBONDED_AVX_128_FMA_SINGLE_SOURCES nb_kernel_avx_128_fma_single/*.c) endif() -if(GMX_CPU_ACCELERATION STREQUAL "AVX_256" AND NOT GMX_DOUBLE) +if("${GMX_CPU_ACCELERATION}" STREQUAL "AVX_256" AND NOT GMX_DOUBLE) file(GLOB NONBONDED_AVX_256_SINGLE_SOURCES nb_kernel_avx_256_single/*.c) endif() -if(GMX_CPU_ACCELERATION STREQUAL "SSE2" AND GMX_DOUBLE) +if("${GMX_CPU_ACCELERATION}" STREQUAL "SSE2" AND GMX_DOUBLE) file(GLOB NONBONDED_SSE2_DOUBLE_SOURCES nb_kernel_sse2_double/*.c) endif() -if(GMX_CPU_ACCELERATION STREQUAL "SSE4.1" AND GMX_DOUBLE) +if("${GMX_CPU_ACCELERATION}" STREQUAL "SSE4.1" AND GMX_DOUBLE) file(GLOB NONBONDED_SSE4_1_DOUBLE_SOURCES nb_kernel_sse4_1_double/*.c) endif() -if(GMX_CPU_ACCELERATION STREQUAL "AVX_128_FMA" AND GMX_DOUBLE) +if("${GMX_CPU_ACCELERATION}" STREQUAL "AVX_128_FMA" AND GMX_DOUBLE) file(GLOB NONBONDED_AVX_128_FMA_DOUBLE_SOURCES nb_kernel_avx_128_fma_double/*.c) endif() -if(GMX_CPU_ACCELERATION STREQUAL "AVX_256" AND GMX_DOUBLE) +if("${GMX_CPU_ACCELERATION}" STREQUAL "AVX_256" AND GMX_DOUBLE) file(GLOB NONBONDED_AVX_256_DOUBLE_SOURCES nb_kernel_avx_256_double/*.c) endif() +if("${GMX_CPU_ACCELERATION}" STREQUAL "Sparc64_HPC_ACE" AND GMX_DOUBLE) + file(GLOB NONBONDED_SPARC64_HPC_ACE_DOUBLE_SOURCES nb_kernel_sparc64_hpc_ace_double/*.c) +endif() + + # These sources will be used in the parent directory's CMakeLists.txt -set(NONBONDED_SOURCES ${NONBONDED_SOURCES} ${NONBONDED_SSE2_SINGLE_SOURCES} ${NONBONDED_SSE4_1_SINGLE_SOURCES} ${NONBONDED_AVX_128_FMA_SINGLE_SOURCES} ${NONBONDED_AVX_256_SINGLE_SOURCES} ${NONBONDED_SSE2_DOUBLE_SOURCES} ${NONBONDED_SSE4_1_DOUBLE_SOURCES} ${NONBONDED_AVX_128_FMA_DOUBLE_SOURCES} ${NONBONDED_AVX_256_DOUBLE_SOURCES} PARENT_SCOPE) +set(NONBONDED_SOURCES ${NONBONDED_SOURCES} ${NONBONDED_SSE2_SINGLE_SOURCES} ${NONBONDED_SSE4_1_SINGLE_SOURCES} ${NONBONDED_AVX_128_FMA_SINGLE_SOURCES} ${NONBONDED_AVX_256_SINGLE_SOURCES} ${NONBONDED_SSE2_DOUBLE_SOURCES} ${NONBONDED_SSE4_1_DOUBLE_SOURCES} ${NONBONDED_AVX_128_FMA_DOUBLE_SOURCES} ${NONBONDED_AVX_256_DOUBLE_SOURCES} ${NONBONDED_SPARC64_HPC_ACE_DOUBLE_SOURCES} PARENT_SCOPE) diff --git a/src/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h b/src/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h new file mode 100644 index 0000000000..dfd38394f3 --- /dev/null +++ b/src/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h @@ -0,0 +1,945 @@ +/* + * This source code is part of + * + * G R O M A C S + * + * Copyright (c) 2011-2012, The GROMACS Development Team + * + * Gromacs is a library for molecular simulation and trajectory analysis, + * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for + * a full list of developers and information, check out http://www.gromacs.org + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) any + * later version. + * As a special exception, you may use this file as part of a free software + * library without restriction. Specifically, if other files instantiate + * templates or use macros or inline functions from this file, or you compile + * this file and link it with other files to produce an executable, this + * file does not by itself cause the resulting executable to be covered by + * the GNU Lesser General Public License. + * + * In plain-speak: do not worry about classes/macros/templates either - only + * changes to the library have to be LGPL, not an application linking with it. + * + * To help fund GROMACS development, we humbly ask that you cite + * the papers people have written on it - you can find them on the website! + */ +#ifndef _kernelutil_sparc64_hpc_ace_double_h_ +#define _kernelutil_sparc64_hpc_ace_double_h_ + +/* Fujitsu header borrows the name from SSE2, since some instructions have aliases */ +#include "emmintrin.h" + +#define GMX_FJSP_SHUFFLE2(x,y) (((x)<<1) | (y)) + +#define GMX_FJSP_TRANSPOSE2_V2R8(row0, row1) { \ + _fjsp_v2r8 __gmx_t1 = row0; \ + row0 = _fjsp_unpacklo_v2r8(row0,row1); \ + row1 = _fjsp_unpackhi_v2r8(__gmx_t1,row1); \ +} + + +static void +gmx_fjsp_print_v2r8(const char *s, _fjsp_v2r8 a) +{ + double lo,hi; + + _fjsp_storel_v2r8(&lo,a); + _fjsp_storeh_v2r8(&hi,a); + printf("%s: %g %g\n",s,lo,hi); +} + + +static _fjsp_v2r8 +gmx_fjsp_set1_v2r8(double d) +{ + return _fjsp_set_v2r8(d,d); +} + +static _fjsp_v2r8 +gmx_fjsp_load1_v2r8(const double * gmx_restrict ptr) +{ + return gmx_fjsp_set1_v2r8(*ptr); +} + + +static int +gmx_fjsp_any_lt_v2r8(_fjsp_v2r8 a, _fjsp_v2r8 b) +{ + union + { + double d; + long long int i; + } + conv; + + a = _fjsp_cmplt_v2r8(a,b); + a = _fjsp_or_v2r8(a, _fjsp_unpackhi_v2r8(a,a)); + _fjsp_storel_v2r8(&(conv.d),a); + return (conv.i != 0); +} + +/* 1.0/sqrt(x) */ +static gmx_inline _fjsp_v2r8 +gmx_fjsp_invsqrt_v2r8(_fjsp_v2r8 x) +{ + const _fjsp_v2r8 half = gmx_fjsp_set1_v2r8(0.5); + const _fjsp_v2r8 three = gmx_fjsp_set1_v2r8(3.0); + _fjsp_v2r8 lu = _fjsp_rsqrta_v2r8(x); + + lu = _fjsp_mul_v2r8(_fjsp_mul_v2r8(half,lu),_fjsp_nmsub_v2r8(_fjsp_mul_v2r8(lu,lu),x,three)); + /* The HPC-ACE instruction set is only available in double precision, while + * single precision is typically sufficient for Gromacs. If you define + * "GMX_RELAXED_DOUBLE_PRECISION" during compile, we stick to two Newton-Raphson + * iterations and accept 32bits of accuracy in 1.0/sqrt(x) and 1.0/x, rather than full + * double precision (53 bits). This is still clearly higher than single precision (24 bits). + */ +#ifndef GMX_RELAXED_DOUBLE_PRECISION + lu = _fjsp_mul_v2r8(_fjsp_mul_v2r8(half,lu),_fjsp_nmsub_v2r8(_fjsp_mul_v2r8(lu,lu),x,three)); +#endif + return _fjsp_mul_v2r8(_fjsp_mul_v2r8(half,lu),_fjsp_nmsub_v2r8(_fjsp_mul_v2r8(lu,lu),x,three)); +} + + +/* 1.0/x */ +static gmx_inline _fjsp_v2r8 +gmx_fjsp_inv_v2r8(_fjsp_v2r8 x) +{ + const _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + __m128d lu = _fjsp_rcpa_v2r8(x); + + /* Perform three N-R steps for double precision */ + lu = _fjsp_mul_v2r8(lu,_fjsp_nmsub_v2r8(lu,x,two)); + /* The HPC-ACE instruction set is only available in double precision, while + * single precision is typically sufficient for Gromacs. If you define + * "GMX_RELAXED_DOUBLE_PRECISION" during compile, we stick to two Newton-Raphson + * iterations and accept 32bits of accuracy in 1.0/sqrt(x) and 1.0/x, rather than full + * double precision (53 bits). This is still clearly higher than single precision (24 bits). + */ +#ifndef GMX_RELAXED_DOUBLE_PRECISION + lu = _fjsp_mul_v2r8(lu,_fjsp_nmsub_v2r8(lu,x,two)); +#endif + return _fjsp_mul_v2r8(lu,_fjsp_nmsub_v2r8(lu,x,two)); +} + + +static gmx_inline _fjsp_v2r8 +gmx_fjsp_calc_rsq_v2r8(_fjsp_v2r8 dx, _fjsp_v2r8 dy, _fjsp_v2r8 dz) +{ + return _fjsp_madd_v2r8(dx,dx,_fjsp_madd_v2r8(dy,dy,_fjsp_mul_v2r8(dz,dz))); +} + +/* Normal sum of four ymm registers */ +#define gmx_fjsp_sum4_v2r8(t0,t1,t2,t3) _fjsp_add_v2r8(_fjsp_add_v2r8(t0,t1),_fjsp_add_v2r8(t2,t3)) + + + + + +static _fjsp_v2r8 +gmx_fjsp_load_2real_swizzle_v2r8(const double * gmx_restrict ptrA, + const double * gmx_restrict ptrB) +{ + return _fjsp_unpacklo_v2r8(_fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrA),_fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrB)); +} + +static _fjsp_v2r8 +gmx_fjsp_load_1real_v2r8(const double * gmx_restrict ptrA) +{ + return _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrA); +} + + +static void +gmx_fjsp_store_2real_swizzle_v2r8(double * gmx_restrict ptrA, + double * gmx_restrict ptrB, + _fjsp_v2r8 xmm1) +{ + _fjsp_v2r8 t2; + + t2 = _fjsp_unpackhi_v2r8(xmm1,xmm1); + _fjsp_storel_v2r8(ptrA,xmm1); + _fjsp_storel_v2r8(ptrB,t2); +} + +static void +gmx_fjsp_store_1real_v2r8(double * gmx_restrict ptrA, _fjsp_v2r8 xmm1) +{ + _fjsp_storel_v2r8(ptrA,xmm1); +} + + +/* Similar to store, but increments value in memory */ +static void +gmx_fjsp_increment_2real_swizzle_v2r8(double * gmx_restrict ptrA, + double * gmx_restrict ptrB, _fjsp_v2r8 xmm1) +{ + _fjsp_v2r8 t1; + + t1 = _fjsp_unpackhi_v2r8(xmm1,xmm1); + xmm1 = _fjsp_add_v2r8(xmm1,_fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrA)); + t1 = _fjsp_add_v2r8(t1,_fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrB)); + _fjsp_storel_v2r8(ptrA,xmm1); + _fjsp_storel_v2r8(ptrB,t1); +} + +static void +gmx_fjsp_increment_1real_v2r8(double * gmx_restrict ptrA, _fjsp_v2r8 xmm1) +{ + _fjsp_v2r8 tmp; + + tmp = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrA); + tmp = _fjsp_add_v2r8(tmp,xmm1); + _fjsp_storel_v2r8(ptrA,tmp); +} + + + +static gmx_inline void +gmx_fjsp_load_2pair_swizzle_v2r8(const double * gmx_restrict p1, + const double * gmx_restrict p2, + _fjsp_v2r8 * gmx_restrict c6, + _fjsp_v2r8 * gmx_restrict c12) +{ + _fjsp_v2r8 t1,t2,t3; + + /* The c6/c12 array should be aligned */ + t1 = _fjsp_load_v2r8(p1); + t2 = _fjsp_load_v2r8(p2); + *c6 = _fjsp_unpacklo_v2r8(t1,t2); + *c12 = _fjsp_unpackhi_v2r8(t1,t2); +} + +static gmx_inline void +gmx_fjsp_load_1pair_swizzle_v2r8(const double * gmx_restrict p1, + _fjsp_v2r8 * gmx_restrict c6, + _fjsp_v2r8 * gmx_restrict c12) +{ + *c6 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1); + *c12 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+1); +} + + +static gmx_inline void +gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(const double * gmx_restrict xyz_shift, + const double * gmx_restrict xyz, + _fjsp_v2r8 * gmx_restrict x1, + _fjsp_v2r8 * gmx_restrict y1, + _fjsp_v2r8 * gmx_restrict z1) +{ + _fjsp_v2r8 mem_xy,mem_z,mem_sxy,mem_sz; + + mem_xy = _fjsp_load_v2r8(xyz); + mem_z = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),xyz+2); + mem_sxy = _fjsp_load_v2r8(xyz_shift); + mem_sz = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),xyz_shift+2); + + mem_xy = _fjsp_add_v2r8(mem_xy,mem_sxy); + mem_z = _fjsp_add_v2r8(mem_z,mem_sz); + + *x1 = _fjsp_shuffle_v2r8(mem_xy,mem_xy,GMX_FJSP_SHUFFLE2(0,0)); + *y1 = _fjsp_shuffle_v2r8(mem_xy,mem_xy,GMX_FJSP_SHUFFLE2(1,1)); + *z1 = _fjsp_shuffle_v2r8(mem_z,mem_z,GMX_FJSP_SHUFFLE2(0,0)); +} + + +static gmx_inline void +gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(const double * gmx_restrict xyz_shift, + const double * gmx_restrict xyz, + _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1, + _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2, + _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3) +{ + _fjsp_v2r8 t1,t2,t3,t4,t5,sxy,sz,szx,syz; + + t1 = _fjsp_load_v2r8(xyz); + t2 = _fjsp_load_v2r8(xyz+2); + t3 = _fjsp_load_v2r8(xyz+4); + t4 = _fjsp_load_v2r8(xyz+6); + t5 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),xyz+8); + + sxy = _fjsp_load_v2r8(xyz_shift); + sz = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),xyz_shift+2); + szx = _fjsp_shuffle_v2r8(sz,sxy,GMX_FJSP_SHUFFLE2(0,0)); + syz = _fjsp_shuffle_v2r8(sxy,sz,GMX_FJSP_SHUFFLE2(0,1)); + + t1 = _fjsp_add_v2r8(t1,sxy); + t2 = _fjsp_add_v2r8(t2,szx); + t3 = _fjsp_add_v2r8(t3,syz); + t4 = _fjsp_add_v2r8(t4,sxy); + t5 = _fjsp_add_v2r8(t5,sz); + + *x1 = _fjsp_shuffle_v2r8(t1,t1,GMX_FJSP_SHUFFLE2(0,0)); + *y1 = _fjsp_shuffle_v2r8(t1,t1,GMX_FJSP_SHUFFLE2(1,1)); + *z1 = _fjsp_shuffle_v2r8(t2,t2,GMX_FJSP_SHUFFLE2(0,0)); + *x2 = _fjsp_shuffle_v2r8(t2,t2,GMX_FJSP_SHUFFLE2(1,1)); + *y2 = _fjsp_shuffle_v2r8(t3,t3,GMX_FJSP_SHUFFLE2(0,0)); + *z2 = _fjsp_shuffle_v2r8(t3,t3,GMX_FJSP_SHUFFLE2(1,1)); + *x3 = _fjsp_shuffle_v2r8(t4,t4,GMX_FJSP_SHUFFLE2(0,0)); + *y3 = _fjsp_shuffle_v2r8(t4,t4,GMX_FJSP_SHUFFLE2(1,1)); + *z3 = _fjsp_shuffle_v2r8(t5,t5,GMX_FJSP_SHUFFLE2(0,0)); +} + + +static gmx_inline void +gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(const double * gmx_restrict xyz_shift, + const double * gmx_restrict xyz, + _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1, + _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2, + _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3, + _fjsp_v2r8 * gmx_restrict x4, _fjsp_v2r8 * gmx_restrict y4, _fjsp_v2r8 * gmx_restrict z4) +{ + _fjsp_v2r8 t1,t2,t3,t4,t5,t6,sxy,sz,szx,syz; + + t1 = _fjsp_load_v2r8(xyz); + t2 = _fjsp_load_v2r8(xyz+2); + t3 = _fjsp_load_v2r8(xyz+4); + t4 = _fjsp_load_v2r8(xyz+6); + t5 = _fjsp_load_v2r8(xyz+8); + t6 = _fjsp_load_v2r8(xyz+10); + + sxy = _fjsp_load_v2r8(xyz_shift); + sz = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),xyz_shift+2); + szx = _fjsp_shuffle_v2r8(sz,sxy,GMX_FJSP_SHUFFLE2(0,0)); + syz = _fjsp_shuffle_v2r8(sxy,sz,GMX_FJSP_SHUFFLE2(0,1)); + + t1 = _fjsp_add_v2r8(t1,sxy); + t2 = _fjsp_add_v2r8(t2,szx); + t3 = _fjsp_add_v2r8(t3,syz); + t4 = _fjsp_add_v2r8(t4,sxy); + t5 = _fjsp_add_v2r8(t5,szx); + t6 = _fjsp_add_v2r8(t6,syz); + + *x1 = _fjsp_shuffle_v2r8(t1,t1,GMX_FJSP_SHUFFLE2(0,0)); + *y1 = _fjsp_shuffle_v2r8(t1,t1,GMX_FJSP_SHUFFLE2(1,1)); + *z1 = _fjsp_shuffle_v2r8(t2,t2,GMX_FJSP_SHUFFLE2(0,0)); + *x2 = _fjsp_shuffle_v2r8(t2,t2,GMX_FJSP_SHUFFLE2(1,1)); + *y2 = _fjsp_shuffle_v2r8(t3,t3,GMX_FJSP_SHUFFLE2(0,0)); + *z2 = _fjsp_shuffle_v2r8(t3,t3,GMX_FJSP_SHUFFLE2(1,1)); + *x3 = _fjsp_shuffle_v2r8(t4,t4,GMX_FJSP_SHUFFLE2(0,0)); + *y3 = _fjsp_shuffle_v2r8(t4,t4,GMX_FJSP_SHUFFLE2(1,1)); + *z3 = _fjsp_shuffle_v2r8(t5,t5,GMX_FJSP_SHUFFLE2(0,0)); + *x4 = _fjsp_shuffle_v2r8(t5,t5,GMX_FJSP_SHUFFLE2(1,1)); + *y4 = _fjsp_shuffle_v2r8(t6,t6,GMX_FJSP_SHUFFLE2(0,0)); + *z4 = _fjsp_shuffle_v2r8(t6,t6,GMX_FJSP_SHUFFLE2(1,1)); +} + + + +static gmx_inline void +gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(const double * gmx_restrict p1, + _fjsp_v2r8 * gmx_restrict x, _fjsp_v2r8 * gmx_restrict y, _fjsp_v2r8 * gmx_restrict z) +{ + *x = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1); + *y = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+1); + *z = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+2); +} + +static gmx_inline void +gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(const double * gmx_restrict p1, + _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1, + _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2, + _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3) +{ + *x1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1); + *y1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+1); + *z1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+2); + *x2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+3); + *y2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+4); + *z2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+5); + *x3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+6); + *y3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+7); + *z3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+8); +} + +static gmx_inline void +gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(const double * gmx_restrict p1, + _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1, + _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2, + _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3, + _fjsp_v2r8 * gmx_restrict x4, _fjsp_v2r8 * gmx_restrict y4, _fjsp_v2r8 * gmx_restrict z4) +{ + *x1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1); + *y1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+1); + *z1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+2); + *x2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+3); + *y2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+4); + *z2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+5); + *x3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+6); + *y3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+7); + *z3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+8); + *x4 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+9); + *y4 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+10); + *z4 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),p1+11); +} + + +static gmx_inline void +gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(const double * gmx_restrict ptrA, + const double * gmx_restrict ptrB, + _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1) +{ + _fjsp_v2r8 t1,t2,t3,t4; + t1 = _fjsp_load_v2r8(ptrA); + t2 = _fjsp_load_v2r8(ptrB); + t3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrA+2); + t4 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrB+2); + GMX_FJSP_TRANSPOSE2_V2R8(t1,t2); + *x1 = t1; + *y1 = t2; + *z1 = _fjsp_unpacklo_v2r8(t3,t4); +} + +static gmx_inline void +gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(const double * gmx_restrict ptrA, const double * gmx_restrict ptrB, + _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1, + _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2, + _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3) +{ +_fjsp_v2r8 t1,t2,t3,t4,t5,t6,t7,t8,t9,t10; + t1 = _fjsp_load_v2r8(ptrA); + t2 = _fjsp_load_v2r8(ptrB); + t3 = _fjsp_load_v2r8(ptrA+2); + t4 = _fjsp_load_v2r8(ptrB+2); + t5 = _fjsp_load_v2r8(ptrA+4); + t6 = _fjsp_load_v2r8(ptrB+4); + t7 = _fjsp_load_v2r8(ptrA+6); + t8 = _fjsp_load_v2r8(ptrB+6); + t9 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrA+8); + t10 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrB+8); + GMX_FJSP_TRANSPOSE2_V2R8(t1,t2); + GMX_FJSP_TRANSPOSE2_V2R8(t3,t4); + GMX_FJSP_TRANSPOSE2_V2R8(t5,t6); + GMX_FJSP_TRANSPOSE2_V2R8(t7,t8); + *x1 = t1; + *y1 = t2; + *z1 = t3; + *x2 = t4; + *y2 = t5; + *z2 = t6; + *x3 = t7; + *y3 = t8; + *z3 = _fjsp_unpacklo_v2r8(t9,t10); +} + + +static gmx_inline void +gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(const double * gmx_restrict ptrA, const double * gmx_restrict ptrB, + _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1, + _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2, + _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3, + _fjsp_v2r8 * gmx_restrict x4, _fjsp_v2r8 * gmx_restrict y4, _fjsp_v2r8 * gmx_restrict z4) +{ + _fjsp_v2r8 t1,t2,t3,t4,t5,t6; + t1 = _fjsp_load_v2r8(ptrA); + t2 = _fjsp_load_v2r8(ptrB); + t3 = _fjsp_load_v2r8(ptrA+2); + t4 = _fjsp_load_v2r8(ptrB+2); + t5 = _fjsp_load_v2r8(ptrA+4); + t6 = _fjsp_load_v2r8(ptrB+4); + GMX_FJSP_TRANSPOSE2_V2R8(t1,t2); + GMX_FJSP_TRANSPOSE2_V2R8(t3,t4); + GMX_FJSP_TRANSPOSE2_V2R8(t5,t6); + *x1 = t1; + *y1 = t2; + *z1 = t3; + *x2 = t4; + *y2 = t5; + *z2 = t6; + t1 = _fjsp_load_v2r8(ptrA+6); + t2 = _fjsp_load_v2r8(ptrB+6); + t3 = _fjsp_load_v2r8(ptrA+8); + t4 = _fjsp_load_v2r8(ptrB+8); + t5 = _fjsp_load_v2r8(ptrA+10); + t6 = _fjsp_load_v2r8(ptrB+10); + GMX_FJSP_TRANSPOSE2_V2R8(t1,t2); + GMX_FJSP_TRANSPOSE2_V2R8(t3,t4); + GMX_FJSP_TRANSPOSE2_V2R8(t5,t6); + *x3 = t1; + *y3 = t2; + *z3 = t3; + *x4 = t4; + *y4 = t5; + *z4 = t6; +} + + +static void +gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(double * gmx_restrict ptrA, + _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1) +{ + _fjsp_v2r8 t1,t2,t3; + + t1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrA); + t2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrA+1); + t3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrA+2); + + t1 = _fjsp_sub_v2r8(t1,x1); + t2 = _fjsp_sub_v2r8(t2,y1); + t3 = _fjsp_sub_v2r8(t3,z1); + _fjsp_storel_v2r8(ptrA,t1); + _fjsp_storel_v2r8(ptrA+1,t2); + _fjsp_storel_v2r8(ptrA+2,t3); +} + +static void +gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(double * gmx_restrict ptrA, _fjsp_v2r8 fscal, + _fjsp_v2r8 dx1, _fjsp_v2r8 dy1, _fjsp_v2r8 dz1) +{ + _fjsp_v2r8 t1,t2,t3; + + t1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrA); + t2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrA+1); + t3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrA+2); + + t1 = _fjsp_nmsub_v2r8(fscal,dx1,t1); + t2 = _fjsp_nmsub_v2r8(fscal,dy1,t2); + t3 = _fjsp_nmsub_v2r8(fscal,dz1,t3); + _fjsp_storel_v2r8(ptrA,t1); + _fjsp_storel_v2r8(ptrA+1,t2); + _fjsp_storel_v2r8(ptrA+2,t3); +} + + +static void +gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(double * gmx_restrict ptrA, + _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1, + _fjsp_v2r8 x2, _fjsp_v2r8 y2, _fjsp_v2r8 z2, + _fjsp_v2r8 x3, _fjsp_v2r8 y3, _fjsp_v2r8 z3) +{ + _fjsp_v2r8 t1,t2,t3,t4,t5; + + t1 = _fjsp_load_v2r8(ptrA); + t2 = _fjsp_load_v2r8(ptrA+2); + t3 = _fjsp_load_v2r8(ptrA+4); + t4 = _fjsp_load_v2r8(ptrA+6); + t5 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrA+8); + + x1 = _fjsp_unpacklo_v2r8(x1,y1); + z1 = _fjsp_unpacklo_v2r8(z1,x2); + y2 = _fjsp_unpacklo_v2r8(y2,z2); + x3 = _fjsp_unpacklo_v2r8(x3,y3); + /* nothing to be done for z3 */ + + t1 = _fjsp_sub_v2r8(t1,x1); + t2 = _fjsp_sub_v2r8(t2,z1); + t3 = _fjsp_sub_v2r8(t3,y2); + t4 = _fjsp_sub_v2r8(t4,x3); + t5 = _fjsp_sub_v2r8(t5,z3); + _fjsp_storel_v2r8(ptrA,t1); + _fjsp_storeh_v2r8(ptrA+1,t1); + _fjsp_storel_v2r8(ptrA+2,t2); + _fjsp_storeh_v2r8(ptrA+3,t2); + _fjsp_storel_v2r8(ptrA+4,t3); + _fjsp_storeh_v2r8(ptrA+5,t3); + _fjsp_storel_v2r8(ptrA+6,t4); + _fjsp_storeh_v2r8(ptrA+7,t4); + _fjsp_storel_v2r8(ptrA+8,t5); +} + + +static void +gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(double * gmx_restrict ptrA, + _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1, + _fjsp_v2r8 x2, _fjsp_v2r8 y2, _fjsp_v2r8 z2, + _fjsp_v2r8 x3, _fjsp_v2r8 y3, _fjsp_v2r8 z3, + _fjsp_v2r8 x4, _fjsp_v2r8 y4, _fjsp_v2r8 z4) +{ + _fjsp_v2r8 t1,t2,t3,t4,t5,t6; + + t1 = _fjsp_load_v2r8(ptrA); + t2 = _fjsp_load_v2r8(ptrA+2); + t3 = _fjsp_load_v2r8(ptrA+4); + t4 = _fjsp_load_v2r8(ptrA+6); + t5 = _fjsp_load_v2r8(ptrA+8); + t6 = _fjsp_load_v2r8(ptrA+10); + + x1 = _fjsp_unpacklo_v2r8(x1,y1); + z1 = _fjsp_unpacklo_v2r8(z1,x2); + y2 = _fjsp_unpacklo_v2r8(y2,z2); + x3 = _fjsp_unpacklo_v2r8(x3,y3); + z3 = _fjsp_unpacklo_v2r8(z3,x4); + y4 = _fjsp_unpacklo_v2r8(y4,z4); + + _fjsp_storel_v2r8(ptrA, _fjsp_sub_v2r8( t1,x1 )); + _fjsp_storeh_v2r8(ptrA+1, _fjsp_sub_v2r8( t1,x1 )); + _fjsp_storel_v2r8(ptrA+2, _fjsp_sub_v2r8( t2,z1 )); + _fjsp_storeh_v2r8(ptrA+3, _fjsp_sub_v2r8( t2,z1 )); + _fjsp_storel_v2r8(ptrA+4, _fjsp_sub_v2r8( t3,y2 )); + _fjsp_storeh_v2r8(ptrA+5, _fjsp_sub_v2r8( t3,y2 )); + _fjsp_storel_v2r8(ptrA+6, _fjsp_sub_v2r8( t4,x3 )); + _fjsp_storeh_v2r8(ptrA+7, _fjsp_sub_v2r8( t4,x3 )); + _fjsp_storel_v2r8(ptrA+8, _fjsp_sub_v2r8( t5,z3 )); + _fjsp_storeh_v2r8(ptrA+9, _fjsp_sub_v2r8( t5,z3 )); + _fjsp_storel_v2r8(ptrA+10, _fjsp_sub_v2r8( t6,y4 )); + _fjsp_storeh_v2r8(ptrA+11, _fjsp_sub_v2r8( t6,y4 )); +} + +static void +gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(double * gmx_restrict ptrA, double * gmx_restrict ptrB, + _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1) +{ + _fjsp_v2r8 t1,t2,t3,t4,t5,t6,t7; + + t1 = _fjsp_load_v2r8(ptrA); + t2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrA+2); + t3 = _fjsp_load_v2r8(ptrB); + t4 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrB+2); + + t5 = _fjsp_unpacklo_v2r8(x1,y1); + t6 = _fjsp_unpackhi_v2r8(x1,y1); + t7 = _fjsp_unpackhi_v2r8(z1,z1); + + t1 = _fjsp_sub_v2r8(t1,t5); + t2 = _fjsp_sub_v2r8(t2,z1); + + t3 = _fjsp_sub_v2r8(t3,t6); + t4 = _fjsp_sub_v2r8(t4,t7); + + _fjsp_storel_v2r8(ptrA,t1); + _fjsp_storeh_v2r8(ptrA+1,t1); + _fjsp_storel_v2r8(ptrA+2,t2); + _fjsp_storel_v2r8(ptrB,t3); + _fjsp_storeh_v2r8(ptrB+1,t3); + _fjsp_storel_v2r8(ptrB+2,t4); +} + + +static void +gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(double * gmx_restrict ptrA, double * gmx_restrict ptrB, + _fjsp_v2r8 fscal, _fjsp_v2r8 dx1, _fjsp_v2r8 dy1, _fjsp_v2r8 dz1) +{ + _fjsp_v2r8 t1,t2,t3,t4,t5,t6,t7,fscalA,fscalB; + + t1 = _fjsp_load_v2r8(ptrA); + t2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrA+2); + t3 = _fjsp_load_v2r8(ptrB); + t4 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrB+2); + fscalA = _fjsp_unpacklo_v2r8(fscal,fscal); + fscalB = _fjsp_unpackhi_v2r8(fscal,fscal); + + t5 = _fjsp_unpacklo_v2r8(dx1,dy1); + t6 = _fjsp_unpackhi_v2r8(dx1,dy1); + t7 = _fjsp_unpackhi_v2r8(dz1,dz1); + + t1 = _fjsp_nmsub_v2r8(fscalA,t5,t1); + t2 = _fjsp_nmsub_v2r8(fscalA,dz1,t2); + + t3 = _fjsp_nmsub_v2r8(fscalB,t6,t3); + t4 = _fjsp_nmsub_v2r8(fscalB,t7,t4); + + _fjsp_storel_v2r8(ptrA,t1); + _fjsp_storeh_v2r8(ptrA+1,t1); + _fjsp_storel_v2r8(ptrA+2,t2); + _fjsp_storel_v2r8(ptrB,t3); + _fjsp_storeh_v2r8(ptrB+1,t3); + _fjsp_storel_v2r8(ptrB+2,t4); +} + + +static void +gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(double * gmx_restrict ptrA, double * gmx_restrict ptrB, + _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1, + _fjsp_v2r8 x2, _fjsp_v2r8 y2, _fjsp_v2r8 z2, + _fjsp_v2r8 x3, _fjsp_v2r8 y3, _fjsp_v2r8 z3) +{ + _fjsp_v2r8 t1,t2,t3,t4,t5,t6,t7,t8,t9,t10; + _fjsp_v2r8 tA,tB,tC,tD,tE,tF,tG,tH,tI; + + t1 = _fjsp_load_v2r8(ptrA); + t2 = _fjsp_load_v2r8(ptrA+2); + t3 = _fjsp_load_v2r8(ptrA+4); + t4 = _fjsp_load_v2r8(ptrA+6); + t5 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrA+8); + t6 = _fjsp_load_v2r8(ptrB); + t7 = _fjsp_load_v2r8(ptrB+2); + t8 = _fjsp_load_v2r8(ptrB+4); + t9 = _fjsp_load_v2r8(ptrB+6); + t10 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrB+8); + + tA = _fjsp_unpacklo_v2r8(x1,y1); + tB = _fjsp_unpackhi_v2r8(x1,y1); + tC = _fjsp_unpacklo_v2r8(z1,x2); + tD = _fjsp_unpackhi_v2r8(z1,x2); + tE = _fjsp_unpacklo_v2r8(y2,z2); + tF = _fjsp_unpackhi_v2r8(y2,z2); + tG = _fjsp_unpacklo_v2r8(x3,y3); + tH = _fjsp_unpackhi_v2r8(x3,y3); + tI = _fjsp_unpackhi_v2r8(z3,z3); + + t1 = _fjsp_sub_v2r8(t1,tA); + t2 = _fjsp_sub_v2r8(t2,tC); + t3 = _fjsp_sub_v2r8(t3,tE); + t4 = _fjsp_sub_v2r8(t4,tG); + t5 = _fjsp_sub_v2r8(t5,z3); + + t6 = _fjsp_sub_v2r8(t6,tB); + t7 = _fjsp_sub_v2r8(t7,tD); + t8 = _fjsp_sub_v2r8(t8,tF); + t9 = _fjsp_sub_v2r8(t9,tH); + t10 = _fjsp_sub_v2r8(t10,tI); + + _fjsp_storel_v2r8(ptrA,t1); + _fjsp_storeh_v2r8(ptrA+1,t1); + _fjsp_storel_v2r8(ptrA+2,t2); + _fjsp_storeh_v2r8(ptrA+3,t2); + _fjsp_storel_v2r8(ptrA+4,t3); + _fjsp_storeh_v2r8(ptrA+5,t3); + _fjsp_storel_v2r8(ptrA+6,t4); + _fjsp_storeh_v2r8(ptrA+7,t4); + _fjsp_storel_v2r8(ptrA+8,t5); + _fjsp_storel_v2r8(ptrB,t6); + _fjsp_storeh_v2r8(ptrB+1,t6); + _fjsp_storel_v2r8(ptrB+2,t7); + _fjsp_storeh_v2r8(ptrB+3,t7); + _fjsp_storel_v2r8(ptrB+4,t8); + _fjsp_storeh_v2r8(ptrB+5,t8); + _fjsp_storel_v2r8(ptrB+6,t9); + _fjsp_storeh_v2r8(ptrB+7,t9); + _fjsp_storel_v2r8(ptrB+8,t10); +} + + +static void +gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(double * gmx_restrict ptrA, double * gmx_restrict ptrB, + _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1, + _fjsp_v2r8 x2, _fjsp_v2r8 y2, _fjsp_v2r8 z2, + _fjsp_v2r8 x3, _fjsp_v2r8 y3, _fjsp_v2r8 z3, + _fjsp_v2r8 x4, _fjsp_v2r8 y4, _fjsp_v2r8 z4) +{ + _fjsp_v2r8 t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12; + _fjsp_v2r8 tA,tB,tC,tD,tE,tF,tG,tH,tI,tJ,tK,tL; + + t1 = _fjsp_load_v2r8(ptrA); + t2 = _fjsp_load_v2r8(ptrA+2); + t3 = _fjsp_load_v2r8(ptrA+4); + t4 = _fjsp_load_v2r8(ptrA+6); + t5 = _fjsp_load_v2r8(ptrA+8); + t6 = _fjsp_load_v2r8(ptrA+10); + t7 = _fjsp_load_v2r8(ptrB); + t8 = _fjsp_load_v2r8(ptrB+2); + t9 = _fjsp_load_v2r8(ptrB+4); + t10 = _fjsp_load_v2r8(ptrB+6); + t11 = _fjsp_load_v2r8(ptrB+8); + t12 = _fjsp_load_v2r8(ptrB+10); + + tA = _fjsp_unpacklo_v2r8(x1,y1); + tB = _fjsp_unpackhi_v2r8(x1,y1); + tC = _fjsp_unpacklo_v2r8(z1,x2); + tD = _fjsp_unpackhi_v2r8(z1,x2); + tE = _fjsp_unpacklo_v2r8(y2,z2); + tF = _fjsp_unpackhi_v2r8(y2,z2); + tG = _fjsp_unpacklo_v2r8(x3,y3); + tH = _fjsp_unpackhi_v2r8(x3,y3); + tI = _fjsp_unpacklo_v2r8(z3,x4); + tJ = _fjsp_unpackhi_v2r8(z3,x4); + tK = _fjsp_unpacklo_v2r8(y4,z4); + tL = _fjsp_unpackhi_v2r8(y4,z4); + + t1 = _fjsp_sub_v2r8(t1,tA); + t2 = _fjsp_sub_v2r8(t2,tC); + t3 = _fjsp_sub_v2r8(t3,tE); + t4 = _fjsp_sub_v2r8(t4,tG); + t5 = _fjsp_sub_v2r8(t5,tI); + t6 = _fjsp_sub_v2r8(t6,tK); + + t7 = _fjsp_sub_v2r8(t7,tB); + t8 = _fjsp_sub_v2r8(t8,tD); + t9 = _fjsp_sub_v2r8(t9,tF); + t10 = _fjsp_sub_v2r8(t10,tH); + t11 = _fjsp_sub_v2r8(t11,tJ); + t12 = _fjsp_sub_v2r8(t12,tL); + + _fjsp_storel_v2r8(ptrA, t1); + _fjsp_storeh_v2r8(ptrA+1,t1); + _fjsp_storel_v2r8(ptrA+2,t2); + _fjsp_storeh_v2r8(ptrA+3,t2); + _fjsp_storel_v2r8(ptrA+4,t3); + _fjsp_storeh_v2r8(ptrA+5,t3); + _fjsp_storel_v2r8(ptrA+6,t4); + _fjsp_storeh_v2r8(ptrA+7,t4); + _fjsp_storel_v2r8(ptrA+8,t5); + _fjsp_storeh_v2r8(ptrA+9,t5); + _fjsp_storel_v2r8(ptrA+10,t6); + _fjsp_storeh_v2r8(ptrA+11,t6); + _fjsp_storel_v2r8(ptrB, t7); + _fjsp_storeh_v2r8(ptrB+1,t7); + _fjsp_storel_v2r8(ptrB+2,t8); + _fjsp_storeh_v2r8(ptrB+3,t8); + _fjsp_storel_v2r8(ptrB+4,t9); + _fjsp_storeh_v2r8(ptrB+5,t9); + _fjsp_storel_v2r8(ptrB+6,t10); + _fjsp_storeh_v2r8(ptrB+7,t10); + _fjsp_storel_v2r8(ptrB+8,t11); + _fjsp_storeh_v2r8(ptrB+9,t11); + _fjsp_storel_v2r8(ptrB+10,t12); + _fjsp_storeh_v2r8(ptrB+11,t12); +} + + + +static gmx_inline void +gmx_fjsp_update_iforce_1atom_swizzle_v2r8(_fjsp_v2r8 fix1, _fjsp_v2r8 fiy1, _fjsp_v2r8 fiz1, + double * gmx_restrict fptr, + double * gmx_restrict fshiftptr) +{ + __m128d t1,t2,t3,t4; + + /* transpose data */ + t1 = fix1; + fix1 = _fjsp_unpacklo_v2r8(fix1,fiy1); /* y0 x0 */ + fiy1 = _fjsp_unpackhi_v2r8(t1,fiy1); /* y1 x1 */ + + fix1 = _fjsp_add_v2r8(fix1,fiy1); + fiz1 = _fjsp_add_v2r8( fiz1, _fjsp_unpackhi_v2r8(fiz1,fiz1 )); + + t4 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr), fix1 ); + _fjsp_storel_v2r8( fptr, t4 ); + _fjsp_storeh_v2r8( fptr+1, t4 ); + _fjsp_storel_v2r8( fptr+2, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),fptr+2), fiz1 )); + + t4 = _fjsp_add_v2r8( _fjsp_load_v2r8(fshiftptr), fix1 ); + _fjsp_storel_v2r8( fshiftptr, t4 ); + _fjsp_storeh_v2r8( fshiftptr+1, t4 ); + _fjsp_storel_v2r8( fshiftptr+2, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),fshiftptr+2), fiz1 )); +} + +static gmx_inline void +gmx_fjsp_update_iforce_3atom_swizzle_v2r8(_fjsp_v2r8 fix1, _fjsp_v2r8 fiy1, _fjsp_v2r8 fiz1, + _fjsp_v2r8 fix2, _fjsp_v2r8 fiy2, _fjsp_v2r8 fiz2, + _fjsp_v2r8 fix3, _fjsp_v2r8 fiy3, _fjsp_v2r8 fiz3, + double * gmx_restrict fptr, + double * gmx_restrict fshiftptr) +{ + __m128d t1,t2,t3,t4,t5,t6; + + /* transpose data */ + GMX_FJSP_TRANSPOSE2_V2R8(fix1,fiy1); + GMX_FJSP_TRANSPOSE2_V2R8(fiz1,fix2); + GMX_FJSP_TRANSPOSE2_V2R8(fiy2,fiz2); + t1 = fix3; + fix3 = _fjsp_unpacklo_v2r8(fix3,fiy3); /* y0 x0 */ + fiy3 = _fjsp_unpackhi_v2r8(t1,fiy3); /* y1 x1 */ + + fix1 = _fjsp_add_v2r8(fix1,fiy1); + fiz1 = _fjsp_add_v2r8(fiz1,fix2); + fiy2 = _fjsp_add_v2r8(fiy2,fiz2); + + fix3 = _fjsp_add_v2r8(fix3,fiy3); + fiz3 = _fjsp_add_v2r8( fiz3, _fjsp_unpackhi_v2r8(fiz3,fiz3)); + + t3 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr), fix1 ); + t4 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+2), fiz1 ); + t5 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+4), fiy2 ); + t6 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+6), fix3 ); + + _fjsp_storel_v2r8( fptr, t3 ); + _fjsp_storeh_v2r8( fptr+1, t3 ); + _fjsp_storel_v2r8( fptr+2, t4 ); + _fjsp_storeh_v2r8( fptr+3, t4 ); + _fjsp_storel_v2r8( fptr+4, t5 ); + _fjsp_storeh_v2r8( fptr+5, t5 ); + _fjsp_storel_v2r8( fptr+6, t6 ); + _fjsp_storeh_v2r8( fptr+7, t6 ); + _fjsp_storel_v2r8( fptr+8, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),fptr+8), fiz3 )); + + fix1 = _fjsp_add_v2r8(fix1,fix3); + t1 = _fjsp_shuffle_v2r8(fiz1,fiy2,GMX_FJSP_SHUFFLE2(0,1)); + fix1 = _fjsp_add_v2r8(fix1,t1); /* x and y sums */ + + t2 = _fjsp_shuffle_v2r8(fiy2,fiy2,GMX_FJSP_SHUFFLE2(1,1)); + fiz1 = _fjsp_add_v2r8(fiz1,fiz3); + fiz1 = _fjsp_add_v2r8(fiz1,t2); /* z sum */ + + t3 = _fjsp_add_v2r8( _fjsp_load_v2r8(fshiftptr), fix1 ); + _fjsp_storel_v2r8( fshiftptr, t3 ); + _fjsp_storeh_v2r8( fshiftptr+1, t3 ); + _fjsp_storel_v2r8( fshiftptr+2, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),fshiftptr+2), fiz1 )); +} + + +static gmx_inline void +gmx_fjsp_update_iforce_4atom_swizzle_v2r8(_fjsp_v2r8 fix1, _fjsp_v2r8 fiy1, _fjsp_v2r8 fiz1, + _fjsp_v2r8 fix2, _fjsp_v2r8 fiy2, _fjsp_v2r8 fiz2, + _fjsp_v2r8 fix3, _fjsp_v2r8 fiy3, _fjsp_v2r8 fiz3, + _fjsp_v2r8 fix4, _fjsp_v2r8 fiy4, _fjsp_v2r8 fiz4, + double * gmx_restrict fptr, + double * gmx_restrict fshiftptr) +{ + __m128d t1,t2,t3,t4,t5,t6,t7,t8; + + /* transpose data */ + GMX_FJSP_TRANSPOSE2_V2R8(fix1,fiy1); + GMX_FJSP_TRANSPOSE2_V2R8(fiz1,fix2); + GMX_FJSP_TRANSPOSE2_V2R8(fiy2,fiz2); + GMX_FJSP_TRANSPOSE2_V2R8(fix3,fiy3); + GMX_FJSP_TRANSPOSE2_V2R8(fiz3,fix4); + GMX_FJSP_TRANSPOSE2_V2R8(fiy4,fiz4); + + fix1 = _fjsp_add_v2r8(fix1,fiy1); + fiz1 = _fjsp_add_v2r8(fiz1,fix2); + fiy2 = _fjsp_add_v2r8(fiy2,fiz2); + fix3 = _fjsp_add_v2r8(fix3,fiy3); + fiz3 = _fjsp_add_v2r8(fiz3,fix4); + fiy4 = _fjsp_add_v2r8(fiy4,fiz4); + + t3 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr), fix1 ); + t4 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+2), fiz1 ); + t5 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+4), fiy2 ); + t6 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+6), fix3 ); + t7 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+8), fiz3 ); + t8 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+10), fiy4 ); + _fjsp_storel_v2r8( fptr, t3 ); + _fjsp_storeh_v2r8( fptr+1, t3 ); + _fjsp_storel_v2r8( fptr+2, t4 ); + _fjsp_storeh_v2r8( fptr+3, t4 ); + _fjsp_storel_v2r8( fptr+4, t5 ); + _fjsp_storeh_v2r8( fptr+5, t5 ); + _fjsp_storel_v2r8( fptr+6, t6 ); + _fjsp_storeh_v2r8( fptr+7, t6 ); + _fjsp_storel_v2r8( fptr+8, t7 ); + _fjsp_storeh_v2r8( fptr+9, t7 ); + _fjsp_storel_v2r8( fptr+10, t8 ); + _fjsp_storeh_v2r8( fptr+11, t8 ); + + t1 = _fjsp_shuffle_v2r8(fiz1,fiy2,GMX_FJSP_SHUFFLE2(0,1)); + fix1 = _fjsp_add_v2r8(fix1,t1); + t2 = _fjsp_shuffle_v2r8(fiz3,fiy4,GMX_FJSP_SHUFFLE2(0,1)); + fix3 = _fjsp_add_v2r8(fix3,t2); + fix1 = _fjsp_add_v2r8(fix1,fix3); /* x and y sums */ + + fiz1 = _fjsp_add_v2r8(fiz1, _fjsp_unpackhi_v2r8(fiy2,fiy2)); + fiz3 = _fjsp_add_v2r8(fiz3, _fjsp_unpackhi_v2r8(fiy4,fiy4)); + fiz1 = _fjsp_add_v2r8(fiz1,fiz3); /* z sum */ + + t3 = _fjsp_add_v2r8( _fjsp_load_v2r8(fshiftptr), fix1 ); + _fjsp_storel_v2r8( fshiftptr, t3 ); + _fjsp_storeh_v2r8( fshiftptr+1, t3 ); + _fjsp_storel_v2r8( fshiftptr+2, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),fshiftptr+2), fiz1 )); +} + + + +static gmx_inline void +gmx_fjsp_update_1pot_v2r8(_fjsp_v2r8 pot1, double * gmx_restrict ptrA) +{ + pot1 = _fjsp_add_v2r8(pot1, _fjsp_unpackhi_v2r8(pot1,pot1)); + _fjsp_storel_v2r8(ptrA,_fjsp_add_v2r8(pot1,_fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrA))); +} + +static gmx_inline void +gmx_fjsp_update_2pot_v2r8(_fjsp_v2r8 pot1, double * gmx_restrict ptrA, + _fjsp_v2r8 pot2, double * gmx_restrict ptrB) +{ + GMX_FJSP_TRANSPOSE2_V2R8(pot1,pot2); + pot1 = _fjsp_add_v2r8(pot1,pot2); + pot2 = _fjsp_unpackhi_v2r8(pot1,pot1); + + _fjsp_storel_v2r8(ptrA,_fjsp_add_v2r8(pot1,_fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrA))); + _fjsp_storel_v2r8(ptrB,_fjsp_add_v2r8(pot2,_fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),ptrB))); +} + + +#endif /* _kernelutil_sparc64_hpc_ace_double_h_ */ diff --git a/src/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py b/src/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py new file mode 100755 index 0000000000..9b723bda3a --- /dev/null +++ b/src/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py @@ -0,0 +1,538 @@ +#!/usr/bin/python +# +# This file is part of the GROMACS molecular simulation package. +# +# Copyright (c) 2012, by the GROMACS development team, led by +# David van der Spoel, Berk Hess, Erik Lindahl, and including many +# others, as listed in the AUTHORS file in the top-level source +# directory and at http://www.gromacs.org. +# +# GROMACS is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2.1 +# of the License, or (at your option) any later version. +# +# GROMACS is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with GROMACS; if not, see +# http://www.gnu.org/licenses, or write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# If you want to redistribute modifications to GROMACS, please +# consider that scientific software is very special. Version +# control is crucial - bugs must be traceable. We will be happy to +# consider code for inclusion in the official distribution, but +# derived work must not be called official GROMACS. Details are found +# in the README & COPYING files - if they are missing, get the +# official version at http://www.gromacs.org. +# +# To help us fund GROMACS development, we humbly ask that you cite +# the research papers on the package. Check out http://www.gromacs.org + +import sys +import os +sys.path.append ( "../preprocessor" ) +from gmxpreprocess import gmxpreprocess + +# "The happiest programs are programs that write other programs." +# +# +# This script controls the generation of Gromacs nonbonded kernels. +# +# We no longer generate kernels on-the-fly, so this file is not run +# during a Gromacs compile - only when we need to update the kernels (=rarely). +# +# To maximize performance, each combination of interactions in Gromacs +# has a separate nonbonded kernel without conditionals in the code. +# To avoid writing hundreds of different routines for each architecture, +# we instead use a custom preprocessor so we can encode the conditionals +# and expand for-loops (e.g, for water-water interactions) +# from a general kernel template. While that file will contain quite a +# few preprocessor directives, it is still an order of magnitude easier +# to maintain than ~200 different kernels (not to mention it avoids bugs). +# +# To actually generate the kernels, this program iteratively calls the +# preprocessor with different define settings corresponding to all +# combinations of coulomb/van-der-Waals/geometry options. +# +# A main goal in the design was to make this new generator _general_. For +# this reason we have used a lot of different fields to identify a particular +# kernel and interaction. Basically, each kernel will have a name like +# +# nbkernel_ElecXX_VdwYY_GeomZZ_VF_QQ() +# +# Where XX/YY/ZZ/VF are strings to identify what the kernel computes. +# +# Elec/Vdw describe the type of interaction for electrostatics and van der Waals. +# The geometry settings correspond e.g. to water-water or water-particle kernels, +# and finally the VF setting is V,F,or VF depending on whether we calculate +# only the potential, only the force, or both of them. The final string (QQ) +# is the architecture/language/optimization of the kernel. +# +Arch = 'sparc64_hpc_ace_double' + +# Explanation of the 'properties': +# +# It is cheap to compute r^2, and the kernels require various other functions of r for +# different kinds of interaction. Depending on the needs of the kernel and the available +# processor instructions, this will be done in different ways. +# +# 'rinv' means we need 1/r, which is calculated as 1/sqrt(r^2). +# 'rinvsq' means we need 1/(r*r). This is calculated as rinv*rinv if we already did rinv, otherwise 1/r^2. +# 'r' is similarly calculated as r^2*rinv when needed +# 'table' means the interaction is tabulated, in which case we will calculate a table index before the interaction +# 'shift' means the interaction will be modified by a constant to make it zero at the cutoff. +# 'cutoff' means the interaction is set to 0.0 outside the cutoff +# + +FileHeader = \ +'/*\n' \ +' * This file is part of the GROMACS molecular simulation package.\n' \ +' *\n' \ +' * Copyright (c) 2012, by the GROMACS development team, led by\n' \ +' * David van der Spoel, Berk Hess, Erik Lindahl, and including many\n' \ +' * others, as listed in the AUTHORS file in the top-level source\n' \ +' * directory and at http://www.gromacs.org.\n' \ +' *\n' \ +' * GROMACS is free software; you can redistribute it and/or\n' \ +' * modify it under the terms of the GNU Lesser General Public License\n' \ +' * as published by the Free Software Foundation; either version 2.1\n' \ +' * of the License, or (at your option) any later version.\n' \ +' *\n' \ +' * GROMACS is distributed in the hope that it will be useful,\n' \ +' * but WITHOUT ANY WARRANTY; without even the implied warranty of\n' \ +' * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n' \ +' * Lesser General Public License for more details.\n' \ +' *\n' \ +' * You should have received a copy of the GNU Lesser General Public\n' \ +' * License along with GROMACS; if not, see\n' \ +' * http://www.gnu.org/licenses, or write to the Free Software Foundation,\n' \ +' * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n' \ +' *\n' \ +' * If you want to redistribute modifications to GROMACS, please\n' \ +' * consider that scientific software is very special. Version\n' \ +' * control is crucial - bugs must be traceable. We will be happy to\n' \ +' * consider code for inclusion in the official distribution, but\n' \ +' * derived work must not be called official GROMACS. Details are found\n' \ +' * in the README & COPYING files - if they are missing, get the\n' \ +' * official version at http://www.gromacs.org.\n' \ +' *\n' \ +' * To help us fund GROMACS development, we humbly ask that you cite\n' \ +' * the research papers on the package. Check out http://www.gromacs.org.\n' \ +' */\n' \ +'/*\n' \ +' * Note: this file was generated by the GROMACS '+Arch+' kernel generator.\n' \ +' */\n' + +############################################### +# ELECTROSTATICS +# Interactions and flags for them +############################################### +ElectrostaticsList = { + 'None' : [], + 'Coulomb' : ['rinv','rinvsq'], + 'ReactionField' : ['rinv','rinvsq'], + 'GeneralizedBorn' : ['rinv','r'], + 'CubicSplineTable' : ['rinv','r','table'], + 'Ewald' : ['rinv','rinvsq','r'], +} + + +############################################### +# VAN DER WAALS +# Interactions and flags for them +############################################### +VdwList = { + 'None' : [], + 'LennardJones' : ['rinvsq'], +# 'Buckingham' : ['rinv','rinvsq','r'], # Disabled for sse4.1 to reduce number of kernels and simply the template + 'CubicSplineTable' : ['rinv','r','table'], +} + + +############################################### +# MODIFIERS +# Different ways to adjust/modify interactions to conserve energy +############################################### +ModifierList = { + 'None' : [], + 'ExactCutoff' : ['exactcutoff'], # Zero the interaction outside the cutoff, used for reaction-field-zero + 'PotentialShift' : ['shift','exactcutoff'], + 'PotentialSwitch' : ['rinv','r','switch','exactcutoff'] +} + + +############################################### +# GEOMETRY COMBINATIONS +############################################### +GeometryNameList = [ + [ 'Particle' , 'Particle' ], + [ 'Water3' , 'Particle' ], + [ 'Water3' , 'Water3' ], + [ 'Water4' , 'Particle' ], + [ 'Water4' , 'Water4' ] +] + + +############################################### +# POTENTIAL / FORCE +############################################### +VFList = [ + 'PotentialAndForce', +# 'Potential', # Not used yet + 'Force' +] + + +############################################### +# GEOMETRY PROPERTIES +############################################### +# Dictionaries with lists telling which interactions are present +# 1,2,3 means particles 1,2,3 (but not 0) have electrostatics! +GeometryElectrostatics = { + 'Particle' : [ 0 ], + 'Particle2' : [ 0 , 1 ], + 'Particle3' : [ 0 , 1 , 2 ], + 'Particle4' : [ 0 , 1 , 2 , 3 ], + 'Water3' : [ 0 , 1 , 2 ], + 'Water4' : [ 1 , 2 , 3 ] +} + +GeometryVdw = { + 'Particle' : [ 0 ], + 'Particle2' : [ 0 , 1 ], + 'Particle3' : [ 0 , 1 , 2 ], + 'Particle4' : [ 0 , 1 , 2 , 3 ], + 'Water3' : [ 0 ], + 'Water4' : [ 0 ] +} + + + + +# Dictionary to abbreviate all strings (mixed from all the lists) +Abbreviation = { + 'None' : 'None', + 'Coulomb' : 'Coul', + 'Ewald' : 'Ew', + 'ReactionField' : 'RF', + 'GeneralizedBorn' : 'GB', + 'CubicSplineTable' : 'CSTab', + 'LennardJones' : 'LJ', + 'Buckingham' : 'Bham', + 'PotentialShift' : 'Sh', + 'PotentialSwitch' : 'Sw', + 'ExactCutoff' : 'Cut', + 'PotentialAndForce' : 'VF', + 'Potential' : 'V', + 'Force' : 'F', + 'Water3' : 'W3', + 'Water4' : 'W4', + 'Particle' : 'P1', + 'Particle2' : 'P2', + 'Particle3' : 'P3', + 'Particle4' : 'P4' +} + + +############################################### +# Functions +############################################### + +# Return a string with the kernel name from current settings +def MakeKernelFileName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom): + ElecStr = 'Elec' + Abbreviation[KernelElec] + if(KernelElecMod!='None'): + ElecStr = ElecStr + Abbreviation[KernelElecMod] + VdwStr = 'Vdw' + Abbreviation[KernelVdw] + if(KernelVdwMod!='None'): + VdwStr = VdwStr + Abbreviation[KernelVdwMod] + GeomStr = 'Geom' + Abbreviation[KernelGeom[0]] + Abbreviation[KernelGeom[1]] + return 'nb_kernel_' + ElecStr + '_' + VdwStr + '_' + GeomStr + '_' + Arch + +def MakeKernelName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF): + ElecStr = 'Elec' + Abbreviation[KernelElec] + if(KernelElecMod!='None'): + ElecStr = ElecStr + Abbreviation[KernelElecMod] + VdwStr = 'Vdw' + Abbreviation[KernelVdw] + if(KernelVdwMod!='None'): + VdwStr = VdwStr + Abbreviation[KernelVdwMod] + GeomStr = 'Geom' + Abbreviation[KernelGeom[0]] + Abbreviation[KernelGeom[1]] + VFStr = Abbreviation[KernelVF] + return 'nb_kernel_' + ElecStr + '_' + VdwStr + '_' + GeomStr + '_' + VFStr + '_' + Arch + +# Return a string with a declaration to use for the kernel; +# this will be a sequence of string combinations as well as the actual function name +# Dont worry about field widths - that is just pretty-printing for the header! +def MakeKernelDecl(KernelName,KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelOther,KernelVF): + KernelStr = '\"'+KernelName+'\"' + ArchStr = '\"'+Arch+'\"' + ElecStr = '\"'+KernelElec+'\"' + ElecModStr = '\"'+KernelElecMod+'\"' + VdwStr = '\"'+KernelVdw+'\"' + VdwModStr = '\"'+KernelVdwMod+'\"' + GeomStr = '\"'+KernelGeom[0]+KernelGeom[1]+'\"' + OtherStr = '\"'+KernelOther+'\"' + VFStr = '\"'+KernelVF+'\"' + + ThisSpec = ArchStr+', '+ElecStr+', '+ElecModStr+', '+VdwStr+', '+VdwModStr+', '+GeomStr+', '+OtherStr+', '+VFStr + ThisDecl = ' { '+KernelName+', '+KernelStr+', '+ThisSpec+' }' + return ThisDecl + + +# Returns 1 if this kernel should be created, 0 if we should skip it +# This routine is not critical - it is not the end of the world if we create more kernels, +# but since the number is pretty large we save both space and compile-time by reducing it a bit. +def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF): + + # No need for kernels without interactions + if(KernelElec=='None' and KernelVdw=='None'): + return 0 + + # No need for modifiers without interactions + if((KernelElec=='None' and KernelElecMod!='None') or (KernelVdw=='None' and KernelVdwMod!='None')): + return 0 + + # No need for LJ-only water optimization, or water optimization with implicit solvent. + if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)): + return 0 + + # Non-matching table settings are pointless + if( ('Table' in KernelElec) and ('Table' in KernelVdw) and KernelElec!=KernelVdw ): + return 0 + + # Try to reduce the number of different switch/shift options to get a reasonable number of kernels + # For electrostatics, reaction-field can use 'exactcutoff', and ewald can use switch or shift. + if(KernelElecMod=='ExactCutoff' and KernelElec!='ReactionField'): + return 0 + if(KernelElecMod in ['PotentialShift','PotentialSwitch'] and KernelElec!='Ewald'): + return 0 + # For Vdw, we support switch and shift for Lennard-Jones/Buckingham + if((KernelVdwMod=='ExactCutoff') or + (KernelVdwMod in ['PotentialShift','PotentialSwitch'] and KernelVdw not in ['LennardJones','Buckingham'])): + return 0 + + # Choose either switch or shift and don't mix them... + if((KernelElecMod=='PotentialShift' and KernelVdwMod=='PotentialSwitch') or + (KernelElecMod=='PotentialSwitch' and KernelVdwMod=='PotentialShift')): + return 0 + + # Don't use a Vdw kernel with a modifier if the electrostatics one does not have one + if(KernelElec!='None' and KernelElecMod=='None' and KernelVdwMod!='None'): + return 0 + + # Don't use an electrostatics kernel with a modifier if the vdw one does not have one, + # unless the electrostatics one is reaction-field with exact cutoff. + if(KernelVdw!='None' and KernelVdwMod=='None' and KernelElecMod!='None'): + if(KernelElec=='ReactionField' and KernelVdw!='CubicSplineTable'): + return 0 + elif(KernelElec!='ReactionField'): + return 0 + + return 1 + + + +# +# The preprocessor will automatically expand the interactions for water and other +# geometries inside the kernel, but to get this right we need to setup a couple +# of defines - we do them in a separate routine to keep the main loop clean. +# +# While this routine might look a bit complex it is actually quite straightforward, +# and the best news is that you wont have to modify _anything_ for a new geometry +# as long as you correctly define its Electrostatics/Vdw geometry in the lists above! +# +def SetDefines(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF,defines): + # What is the _name_ for the i/j group geometry? + igeometry = KernelGeom[0] + jgeometry = KernelGeom[1] + # define so we can access it in the source when the preprocessor runs + defines['GEOMETRY_I'] = igeometry + defines['GEOMETRY_J'] = jgeometry + + # For the i/j groups, extract a python list of which sites have electrostatics + # For SPC/TIP3p this will be [1,1,1], while TIP4p (no elec on first site) will be [0,1,1,1] + ielec = GeometryElectrostatics[igeometry] + jelec = GeometryElectrostatics[jgeometry] + # Zero out the corresponding lists in case we dont do Elec + if(KernelElec=='None'): + ielec = [] + jelec = [] + + # Extract similar interaction lists for Vdw interactions (example for SPC: [1,0,0]) + iVdw = GeometryVdw[igeometry] + jVdw = GeometryVdw[jgeometry] + + # Zero out the corresponding lists in case we dont do Vdw + if(KernelVdw=='None'): + iVdw = [] + jVdw = [] + + # iany[] and jany[] contains lists of the particles actually used (for interactions) in this kernel + iany = list(set(ielec+iVdw)) # convert to+from set to make elements unique + jany = list(set(jelec+jVdw)) + + defines['PARTICLES_ELEC_I'] = ielec + defines['PARTICLES_ELEC_J'] = jelec + defines['PARTICLES_VDW_I'] = iVdw + defines['PARTICLES_VDW_J'] = jVdw + defines['PARTICLES_I'] = iany + defines['PARTICLES_J'] = jany + + # elecij,Vdwij are sets with pairs of particles for which the corresponding interaction is done + # (and anyij again corresponds to either electrostatics or Vdw) + elecij = [] + Vdwij = [] + anyij = [] + + for i in ielec: + for j in jelec: + elecij.append([i,j]) + + for i in iVdw: + for j in jVdw: + Vdwij.append([i,j]) + + for i in iany: + for j in jany: + if [i,j] in elecij or [i,j] in Vdwij: + anyij.append([i,j]) + + defines['PAIRS_IJ'] = anyij + + # Make an 2d list-of-distance-properties-to-calculate for i,j + ni = max(iany)+1 + nj = max(jany)+1 + # Each element properties[i][j] is an empty list + properties = [ [ [] for j in range(0,nj) ] for i in range (0,ni) ] + # Add properties to each set + for i in range(0,ni): + for j in range(0,nj): + if [i,j] in elecij: + properties[i][j] = properties[i][j] + ['electrostatics'] + ElectrostaticsList[KernelElec] + ModifierList[KernelElecMod] + if [i,j] in Vdwij: + properties[i][j] = properties[i][j] + ['vdw'] + VdwList[KernelVdw] + ModifierList[KernelVdwMod] + # Add rinv if we need r + if 'r' in properties[i][j]: + properties[i][j] = properties[i][j] + ['rinv'] + # Add rsq if we need rinv or rinsq + if 'rinv' in properties[i][j] or 'rinvsq' in properties[i][j]: + properties[i][j] = properties[i][j] + ['rsq'] + + defines['INTERACTION_FLAGS'] = properties + + + +def PrintStatistics(ratio): + ratio = 100.0*ratio + print '\rGenerating %s nonbonded kernels... %5.1f%%' % (Arch,ratio), + sys.stdout.flush() + + + +defines = {} +kerneldecl = [] + +cnt = 0.0 +nelec = len(ElectrostaticsList) +nVdw = len(VdwList) +nmod = len(ModifierList) +ngeom = len(GeometryNameList) + +ntot = nelec*nmod*nVdw*nmod*ngeom + +numKernels = 0 + +fpdecl = open('nb_kernel_' + Arch + '.c','w') +fpdecl.write( FileHeader ) +fpdecl.write( '#ifndef nb_kernel_' + Arch + '_h\n' ) +fpdecl.write( '#define nb_kernel_' + Arch + '_h\n\n' ) +fpdecl.write( '#include "../nb_kernel.h"\n\n' ) + +for KernelElec in ElectrostaticsList: + defines['KERNEL_ELEC'] = KernelElec + + for KernelElecMod in ModifierList: + defines['KERNEL_MOD_ELEC'] = KernelElecMod + + for KernelVdw in VdwList: + defines['KERNEL_VDW'] = KernelVdw + + for KernelVdwMod in ModifierList: + defines['KERNEL_MOD_VDW'] = KernelVdwMod + + for KernelGeom in GeometryNameList: + + cnt += 1 + KernelFilename = MakeKernelFileName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom) + '.c' + fpkernel = open(KernelFilename,'w') + defines['INCLUDE_HEADER'] = 1 # Include header first time in new file + DoHeader = 1 + + for KernelVF in VFList: + + KernelName = MakeKernelName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF) + + defines['KERNEL_NAME'] = KernelName + defines['KERNEL_VF'] = KernelVF + + # Check if this is a valid/sane/usable combination + if not KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF): + continue; + + # The overall kernel settings determine what the _kernel_ calculates, but for the water + # kernels this does not mean that every pairwise interaction has e.g. Vdw interactions. + # This routine sets defines of what to calculate for each pair of particles in those cases. + SetDefines(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF,defines) + + if(DoHeader==1): + fpkernel.write( FileHeader ) + + gmxpreprocess('nb_kernel_template_' + Arch + '.pre', KernelName+'.tmp' , defines, force=1,contentType='C') + numKernels = numKernels + 1 + + defines['INCLUDE_HEADER'] = 0 # Header has been included once now + DoHeader=0 + + # Append temp file contents to the common kernelfile + fptmp = open(KernelName+'.tmp','r') + fpkernel.writelines(fptmp.readlines()) + fptmp.close() + os.remove(KernelName+'.tmp') + + # Add a declaration for this kernel + fpdecl.write('nb_kernel_t ' + KernelName + ';\n'); + + # Add declaration to the buffer + KernelOther='' + kerneldecl.append(MakeKernelDecl(KernelName,KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelOther,KernelVF)) + + filesize = fpkernel.tell() + fpkernel.close() + if(filesize==0): + os.remove(KernelFilename) + + PrintStatistics(cnt/ntot) + pass + pass + pass + pass +pass + +# Write out the list of settings and corresponding kernels to the declaration file +fpdecl.write( '\n\n' ) +fpdecl.write( 'nb_kernel_info_t\n' ) +fpdecl.write( 'kernellist_'+Arch+'[] =\n' ) +fpdecl.write( '{\n' ) +for decl in kerneldecl[0:-1]: + fpdecl.write( decl + ',\n' ) +fpdecl.write( kerneldecl[-1] + '\n' ) +fpdecl.write( '};\n\n' ) +fpdecl.write( 'int\n' ) +fpdecl.write( 'kernellist_'+Arch+'_size = sizeof(kernellist_'+Arch+')/sizeof(kernellist_'+Arch+'[0]);\n\n') +fpdecl.write( '#endif\n') +fpdecl.close() diff --git a/src/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c b/src/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c new file mode 100644 index 0000000000..4b3773de53 --- /dev/null +++ b/src/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.c @@ -0,0 +1,711 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2012, by the GROMACS development team, led by + * David van der Spoel, Berk Hess, Erik Lindahl, and including many + * others, as listed in the AUTHORS file in the top-level source + * directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/* + * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: CubicSplineTable + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 9 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*76); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: CubicSplineTable + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: CubicSplineTable + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*171); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: CubicSplineTable + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: CubicSplineTable + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*444); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: CubicSplineTable + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: CubicSplineTable + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*200); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: CubicSplineTable + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: CubicSplineTable + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*476); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: CubicSplineTable + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 9 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*59); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: LennardJones + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*154); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: LennardJones + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: LennardJones + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*427); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: LennardJones + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: LennardJones + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*176); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: LennardJones + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: LennardJones + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*452); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: LennardJones + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: None + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 8 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*46); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: None + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: None + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*141); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: None + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: None + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + qq00 = _fjsp_mul_v2r8(iq0,jq0); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*414); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: None + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + qq00 = _fjsp_mul_v2r8(iq0,jq0); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: None + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*141); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: None + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: None + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*414); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: CubicSplineTable + * VdW interaction: None + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: CubicSplineTable + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 9 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*66); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: CubicSplineTable + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: CubicSplineTable + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*131); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: CubicSplineTable + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: CubicSplineTable + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*314); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: CubicSplineTable + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: CubicSplineTable + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*155); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: CubicSplineTable + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: CubicSplineTable + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*341); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: CubicSplineTable + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 9 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*43); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: LennardJones + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*108); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: LennardJones + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: LennardJones + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*291); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: LennardJones + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: LennardJones + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*131); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: LennardJones + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: LennardJones + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*317); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: LennardJones + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: None + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 8 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*31); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: None + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: None + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*96); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: None + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: None + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + qq00 = _fjsp_mul_v2r8(iq0,jq0); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*279); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: None + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + qq00 = _fjsp_mul_v2r8(iq0,jq0); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: None + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*96); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: None + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: None + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*279); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: Coulomb + * VdW interaction: None + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 9 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*67); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*168); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*459); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*194); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*488); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 8 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*49); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*150); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + qq00 = _fjsp_mul_v2r8(iq0,jq0); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*441); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + qq00 = _fjsp_mul_v2r8(iq0,jq0); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*150); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*441); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 9 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*86); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*225); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*630); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*269); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*677); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 8 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*68); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*207); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + qq00 = _fjsp_mul_v2r8(iq0,jq0); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*612); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + qq00 = _fjsp_mul_v2r8(iq0,jq0); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*207); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*612); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: CubicSplineTable + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 9 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*78); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: CubicSplineTable + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: CubicSplineTable + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*169); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: CubicSplineTable + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: CubicSplineTable + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*430); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: CubicSplineTable + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: CubicSplineTable + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*194); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: CubicSplineTable + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: CubicSplineTable + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*458); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: CubicSplineTable + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 9 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*56); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*147); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*408); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*170); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*434); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: LennardJones + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 8 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*44); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*135); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + qq00 = _fjsp_mul_v2r8(iq0,jq0); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*396); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + qq00 = _fjsp_mul_v2r8(iq0,jq0); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*135); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*396); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: Ewald + * VdW interaction: None + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: GeneralizedBorn + * VdW interaction: CubicSplineTable + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; + _fjsp_v2r8 minushalf = gmx_fjsp_set1_v2r8(-0.5); + real *invsqrta,*dvda,*gbtab; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + invsqrta = fr->invsqrta; + dvda = fr->dvda; + gbtabscale = gmx_fjsp_set1_v2r8(fr->gbtab.scale); + gbtab = fr->gbtab.data; + gbinvepsdiff = gmx_fjsp_set1_v2r8((1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vgbsum,kernel_data->energygrp_polarization+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + dvdasum = _fjsp_mul_v2r8(dvdasum, _fjsp_mul_v2r8(isai0,isai0)); + gmx_fjsp_update_1pot_v2r8(dvdasum,dvda+inr); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 10 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*95); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: GeneralizedBorn + * VdW interaction: CubicSplineTable + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; + _fjsp_v2r8 minushalf = gmx_fjsp_set1_v2r8(-0.5); + real *invsqrta,*dvda,*gbtab; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + invsqrta = fr->invsqrta; + dvda = fr->dvda; + gbtabscale = gmx_fjsp_set1_v2r8(fr->gbtab.scale); + gbtab = fr->gbtab.data; + gbinvepsdiff = gmx_fjsp_set1_v2r8((1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: GeneralizedBorn + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; + _fjsp_v2r8 minushalf = gmx_fjsp_set1_v2r8(-0.5); + real *invsqrta,*dvda,*gbtab; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + invsqrta = fr->invsqrta; + dvda = fr->dvda; + gbtabscale = gmx_fjsp_set1_v2r8(fr->gbtab.scale); + gbtab = fr->gbtab.data; + gbinvepsdiff = gmx_fjsp_set1_v2r8((1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vgbsum,kernel_data->energygrp_polarization+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + dvdasum = _fjsp_mul_v2r8(dvdasum, _fjsp_mul_v2r8(isai0,isai0)); + gmx_fjsp_update_1pot_v2r8(dvdasum,dvda+inr); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 10 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*10 + inneriter*74); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: GeneralizedBorn + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; + _fjsp_v2r8 minushalf = gmx_fjsp_set1_v2r8(-0.5); + real *invsqrta,*dvda,*gbtab; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + invsqrta = fr->invsqrta; + dvda = fr->dvda; + gbtabscale = gmx_fjsp_set1_v2r8(fr->gbtab.scale); + gbtab = fr->gbtab.data; + gbinvepsdiff = gmx_fjsp_set1_v2r8((1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: GeneralizedBorn + * VdW interaction: None + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; + _fjsp_v2r8 minushalf = gmx_fjsp_set1_v2r8(-0.5); + real *invsqrta,*dvda,*gbtab; + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + invsqrta = fr->invsqrta; + dvda = fr->dvda; + gbtabscale = gmx_fjsp_set1_v2r8(fr->gbtab.scale); + gbtab = fr->gbtab.data; + gbinvepsdiff = gmx_fjsp_set1_v2r8((1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vgbsum,kernel_data->energygrp_polarization+ggid); + dvdasum = _fjsp_mul_v2r8(dvdasum, _fjsp_mul_v2r8(isai0,isai0)); + gmx_fjsp_update_1pot_v2r8(dvdasum,dvda+inr); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 9 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*9 + inneriter*61); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: GeneralizedBorn + * VdW interaction: None + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecGB_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; + _fjsp_v2r8 minushalf = gmx_fjsp_set1_v2r8(-0.5); + real *invsqrta,*dvda,*gbtab; + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + + invsqrta = fr->invsqrta; + dvda = fr->dvda; + gbtabscale = gmx_fjsp_set1_v2r8(fr->gbtab.scale); + gbtab = fr->gbtab.data; + gbinvepsdiff = gmx_fjsp_set1_v2r8((1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: None + * VdW interaction: CubicSplineTable + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 7 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*59); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: None + * VdW interaction: CubicSplineTable + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: None + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + rcutoff_scalar = fr->rvdw; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 7 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*44); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: None + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + rcutoff_scalar = fr->rvdw; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: None + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + rcutoff_scalar = fr->rvdw; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rvdw_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 7 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*62); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: None + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + rcutoff_scalar = fr->rvdw; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rvdw_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: None + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 7 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*35); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: None + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 9 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*75); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*156); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*387); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*179); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*413); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 9 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*57); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*138); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*369); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*164); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*398); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rvdw_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 9 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*73); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rvdw_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rvdw_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*154); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rvdw_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rvdw_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*385); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rvdw_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rvdw_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*182); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rvdw_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rvdw_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*416); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + rswitch_scalar = fr->rvdw_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 8 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*39); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*120); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + qq00 = _fjsp_mul_v2r8(iq0,jq0); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*351); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + qq00 = _fjsp_mul_v2r8(iq0,jq0); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*120); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*351); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 9 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*70); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*143); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*350); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*167); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*377); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: CubicSplineTable + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 9 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*47); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*120); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 20 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*327); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + vdwjidx0A = 2*vdwtype[inr+0]; + qq00 = _fjsp_mul_v2r8(iq0,jq0); + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*143); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 26 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*353); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: LennardJones + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + vdwjidx0A = 2*vdwtype[inr+0]; + c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); + c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Particle-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 8 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*35); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Particle-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Water3-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*108); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Water3-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Water3-Water3 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + qq00 = _fjsp_mul_v2r8(iq0,jq0); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*315); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Water3-Water3 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset0; + _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; + _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; + _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + + jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + qq00 = _fjsp_mul_v2r8(iq0,jq0); + qq01 = _fjsp_mul_v2r8(iq0,jq1); + qq02 = _fjsp_mul_v2r8(iq0,jq2); + qq10 = _fjsp_mul_v2r8(iq1,jq0); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq20 = _fjsp_mul_v2r8(iq2,jq0); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Water4-Particle + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*108); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Water4-Particle + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx0A,vdwjidx0B; + _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; + _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; + _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; + _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" + +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Water4-Water4 + * Calculate force/pot: PotentialAndForce + */ +void +nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses 19 flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*315); +} +/* + * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double + * Electrostatics interaction: ReactionField + * VdW interaction: None + * Geometry: Water4-Water4 + * Calculate force/pot: Force + */ +void +nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + int vdwioffset1; + _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; + int vdwioffset2; + _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; + int vdwioffset3; + _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; + int vdwjidx1A,vdwjidx1B; + _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; + int vdwjidx2A,vdwjidx2B; + _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; + int vdwjidx3A,vdwjidx3B; + _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; + _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; + _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; + _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; + _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; + _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; + _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; + _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; + _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; + _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); + iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); + iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); + + jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); + jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); + jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); + qq11 = _fjsp_mul_v2r8(iq1,jq1); + qq12 = _fjsp_mul_v2r8(iq1,jq2); + qq13 = _fjsp_mul_v2r8(iq1,jq3); + qq21 = _fjsp_mul_v2r8(iq2,jq1); + qq22 = _fjsp_mul_v2r8(iq2,jq2); + qq23 = _fjsp_mul_v2r8(iq2,jq3); + qq31 = _fjsp_mul_v2r8(iq3,jq1); + qq32 = _fjsp_mul_v2r8(iq3,jq2); + qq33 = _fjsp_mul_v2r8(iq3,jq3); + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidx +#endif + +#include + +#include "../nb_kernel.h" +#include "types/simple.h" +#include "vec.h" +#include "nrnb.h" + +#include "kernelutil_sparc64_hpc_ace_double.h" +/* #endif */ + +/* ## List of variables set by the generating script: */ +/* ## */ +/* ## Setttings that apply to the entire kernel: */ +/* ## KERNEL_ELEC: String, choice for electrostatic interactions */ +/* ## KERNEL_VDW: String, choice for van der Waals interactions */ +/* ## KERNEL_NAME: String, name of this kernel */ +/* ## KERNEL_VF: String telling if we calculate potential, force, or both */ +/* ## GEOMETRY_I/GEOMETRY_J: String, name of each geometry, e.g. 'Water3' or '1Particle' */ +/* ## */ +/* ## Setttings that apply to particles in the outer (I) or inner (J) loops: */ +/* ## PARTICLES_I[]/ Arrays with lists of i/j particles to use in kernel. It is */ +/* ## PARTICLES_J[]: just [0] for particle geometry, but can be longer for water */ +/* ## PARTICLES_ELEC_I[]/ Arrays with lists of i/j particle that have electrostatics */ +/* ## PARTICLES_ELEC_J[]: interactions that should be calculated in this kernel. */ +/* ## PARTICLES_VDW_I[]/ Arrays with the list of i/j particle that have VdW */ +/* ## PARTICLES_VDW_J[]: interactions that should be calculated in this kernel. */ +/* ## */ +/* ## Setttings for pairs of interactions (e.g. 2nd i particle against 1st j particle) */ +/* ## PAIRS_IJ[]: Array with (i,j) tuples of pairs for which interactions */ +/* ## should be calculated in this kernel. Zero-charge particles */ +/* ## do not have interactions with particles without vdw, and */ +/* ## Vdw-only interactions are not evaluated in a no-vdw-kernel. */ +/* ## INTERACTION_FLAGS[][]: 2D matrix, dimension e.g. 3*3 for water-water interactions. */ +/* ## For each i-j pair, the element [I][J] is a list of strings */ +/* ## defining properties/flags of this interaction. Examples */ +/* ## include 'electrostatics'/'vdw' if that type of interaction */ +/* ## should be evaluated, 'rsq'/'rinv'/'rinvsq' if those values */ +/* ## are needed, and 'exactcutoff' or 'shift','switch' to */ +/* ## decide if the force/potential should be modified. This way */ +/* ## we only calculate values absolutely needed for each case. */ + +/* ## Calculate the size and offset for (merged/interleaved) table data */ + +/* + * Gromacs nonbonded kernel: {KERNEL_NAME} + * Electrostatics interaction: {KERNEL_ELEC} + * VdW interaction: {KERNEL_VDW} + * Geometry: {GEOMETRY_I}-{GEOMETRY_J} + * Calculate force/pot: {KERNEL_VF} + */ +void +{KERNEL_NAME} + (t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + /* ## Not all variables are used for all kernels, but any optimizing compiler fixes that, */ + /* ## so there is no point in going to extremes to exclude variables that are not needed. */ + /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or + * just 0 for non-waters. + * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different + * jnr indices corresponding to data put in the four positions in the SIMD register. + */ + int i_shift_offset,i_coord_offset,outeriter,inneriter; + int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; + int jnrA,jnrB; + int j_coord_offsetA,j_coord_offsetB; + int *iinr,*jindex,*jjnr,*shiftidx,*gid; + real rcutoff_scalar; + real *shiftvec,*fshift,*x,*f; + _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; + /* #for I in PARTICLES_I */ + int vdwioffset{I}; + _fjsp_v2r8 ix{I},iy{I},iz{I},fix{I},fiy{I},fiz{I},iq{I},isai{I}; + /* #endfor */ + /* #for J in PARTICLES_J */ + int vdwjidx{J}A,vdwjidx{J}B; + _fjsp_v2r8 jx{J},jy{J},jz{J},fjx{J},fjy{J},fjz{J},jq{J},isaj{J}; + /* #endfor */ + /* #for I,J in PAIRS_IJ */ + _fjsp_v2r8 dx{I}{J},dy{I}{J},dz{I}{J},rsq{I}{J},rinv{I}{J},rinvsq{I}{J},r{I}{J},qq{I}{J},c6_{I}{J},c12_{I}{J}; + /* #endfor */ + /* #if KERNEL_ELEC != 'None' */ + _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; + real *charge; + /* #endif */ + /* #if 'GeneralizedBorn' in KERNEL_ELEC */ + _fjsp_v2r8 vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,twogbeps,dvdatmp; + _fjsp_v2r8 minushalf = gmx_fjsp_set1_v2r8(-0.5); + real *invsqrta,*dvda,*gbtab; + /* #endif */ + /* #if KERNEL_VDW != 'None' */ + int nvdwtype; + _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; + int *vdwtype; + real *vdwparam; + _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); + _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); + /* #endif */ + /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */ + _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; + real *vftab; + /* #endif */ + /* #if 'Ewald' in KERNEL_ELEC */ + _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; + real *ewtab; + /* #endif */ + /* #if 'PotentialSwitch' in [KERNEL_MOD_ELEC,KERNEL_MOD_VDW] */ + _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; + real rswitch_scalar,d_scalar; + /* #endif */ + _fjsp_v2r8 itab_tmp; + _fjsp_v2r8 dummy_mask,cutoff_mask; + _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); + _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); + union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; + + x = xx[0]; + f = ff[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + shiftidx = nlist->shift; + gid = nlist->gid; + shiftvec = fr->shift_vec[0]; + fshift = fr->fshift[0]; + /* #if KERNEL_ELEC != 'None' */ + facel = gmx_fjsp_set1_v2r8(fr->epsfac); + charge = mdatoms->chargeA; + /* #if 'ReactionField' in KERNEL_ELEC */ + krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); + krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); + crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); + /* #endif */ + /* #endif */ + /* #if KERNEL_VDW != 'None' */ + nvdwtype = fr->ntype; + vdwparam = fr->nbfp; + vdwtype = mdatoms->typeA; + /* #endif */ + + /* #if 'Table' in KERNEL_ELEC and 'Table' in KERNEL_VDW */ + vftab = kernel_data->table_elec_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); + /* #elif 'Table' in KERNEL_ELEC */ + vftab = kernel_data->table_elec->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); + /* #elif 'Table' in KERNEL_VDW */ + vftab = kernel_data->table_vdw->data; + vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); + /* #endif */ + + /* #if 'Ewald' in KERNEL_ELEC */ + sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); + /* #if KERNEL_VF=='Force' and KERNEL_MOD_ELEC!='PotentialSwitch' */ + ewtab = fr->ic->tabq_coul_F; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + /* #else */ + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); + ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); + /* #endif */ + /* #endif */ + + /* #if KERNEL_ELEC=='GeneralizedBorn' */ + invsqrta = fr->invsqrta; + dvda = fr->dvda; + gbtabscale = gmx_fjsp_set1_v2r8(fr->gbtab.scale); + gbtab = fr->gbtab.data; + gbinvepsdiff = gmx_fjsp_set1_v2r8((1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent)); + /* #endif */ + + /* #if 'Water' in GEOMETRY_I */ + /* Setup water-specific parameters */ + inr = nlist->iinr[0]; + /* #for I in PARTICLES_ELEC_I */ + iq{I} = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+{I}])); + /* #endfor */ + /* #for I in PARTICLES_VDW_I */ + vdwioffset{I} = 2*nvdwtype*vdwtype[inr+{I}]; + /* #endfor */ + /* #endif */ + + /* #if 'Water' in GEOMETRY_J */ + /* #for J in PARTICLES_ELEC_J */ + jq{J} = gmx_fjsp_set1_v2r8(charge[inr+{J}]); + /* #endfor */ + /* #for J in PARTICLES_VDW_J */ + vdwjidx{J}A = 2*vdwtype[inr+{J}]; + /* #endfor */ + /* #for I,J in PAIRS_IJ */ + /* #if 'electrostatics' in INTERACTION_FLAGS[I][J] */ + qq{I}{J} = _fjsp_mul_v2r8(iq{I},jq{J}); + /* #endif */ + /* #if 'vdw' in INTERACTION_FLAGS[I][J] */ + c6_{I}{J} = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset{I}+vdwjidx{J}A]); + c12_{I}{J} = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset{I}+vdwjidx{J}A+1]); + /* #endif */ + /* #endfor */ + /* #endif */ + + /* #if KERNEL_MOD_ELEC!='None' or KERNEL_MOD_VDW!='None' */ + /* #if KERNEL_ELEC!='None' */ + /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ + rcutoff_scalar = fr->rcoulomb; + /* #else */ + rcutoff_scalar = fr->rvdw; + /* #endif */ + rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); + rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); + /* #endif */ + + /* #if KERNEL_MOD_VDW=='PotentialShift' */ + sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); + rvdw = gmx_fjsp_set1_v2r8(fr->rvdw); + /* #endif */ + + /* #if 'PotentialSwitch' in [KERNEL_MOD_ELEC,KERNEL_MOD_VDW] */ + /* #if KERNEL_MOD_ELEC=='PotentialSwitch' */ + rswitch_scalar = fr->rcoulomb_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* #else */ + rswitch_scalar = fr->rvdw_switch; + rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); + /* #endif */ + /* Setup switch parameters */ + d_scalar = rcutoff_scalar-rswitch_scalar; + d = gmx_fjsp_set1_v2r8(d_scalar); + swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); + swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + /* #if 'Force' in KERNEL_VF */ + swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); + swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); + swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); + /* #endif */ + /* #endif */ + + /* Avoid stupid compiler warnings */ + jnrA = jnrB = 0; + j_coord_offsetA = 0; + j_coord_offsetB = 0; + + /* ## Keep track of the floating point operations we issue for reporting! */ + /* #define OUTERFLOPS 0 */ + outeriter = 0; + inneriter = 0; + + /* Start outer loop over neighborlists */ + for(iidx=0; iidxenergygrp_elec+ggid); + /* #define OUTERFLOPS OUTERFLOPS+1 */ + /* #endif */ + /* #if 'GeneralizedBorn' in KERNEL_ELEC */ + gmx_fjsp_update_1pot_v2r8(vgbsum,kernel_data->energygrp_polarization+ggid); + /* #define OUTERFLOPS OUTERFLOPS+1 */ + /* #endif */ + /* #if KERNEL_VDW != 'None' */ + gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); + /* #define OUTERFLOPS OUTERFLOPS+1 */ + /* #endif */ + /* #endif */ + /* #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */ + dvdasum = _fjsp_mul_v2r8(dvdasum, _fjsp_mul_v2r8(isai{I},isai{I})); + gmx_fjsp_update_1pot_v2r8(dvdasum,dvda+inr); + /* #endif */ + + /* Increment number of inner iterations */ + inneriter += j_index_end - j_index_start; + + /* Outer loop uses {OUTERFLOPS} flops */ + } + + /* Increment number of outer iterations */ + outeriter += nri; + + /* Update outer/inner flops */ + /* ## NB: This is not important, it just affects the flopcount. However, since our preprocessor is */ + /* ## primitive and replaces aggressively even in strings inside these directives, we need to */ + /* ## assemble the main part of the name (containing KERNEL/ELEC/VDW) directly in the source. */ + /* #if GEOMETRY_I == 'Water3' */ + /* #define ISUFFIX '_W3' */ + /* #elif GEOMETRY_I == 'Water4' */ + /* #define ISUFFIX '_W4' */ + /* #else */ + /* #define ISUFFIX '' */ + /* #endif */ + /* #if GEOMETRY_J == 'Water3' */ + /* #define JSUFFIX 'W3' */ + /* #elif GEOMETRY_J == 'Water4' */ + /* #define JSUFFIX 'W4' */ + /* #else */ + /* #define JSUFFIX '' */ + /* #endif */ + /* #if 'PotentialAndForce' in KERNEL_VF */ + /* #define VFSUFFIX '_VF' */ + /* #elif 'Potential' in KERNEL_VF */ + /* #define VFSUFFIX '_V' */ + /* #else */ + /* #define VFSUFFIX '_F' */ + /* #endif */ + + /* #if KERNEL_ELEC != 'None' and KERNEL_VDW != 'None' */ + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW{ISUFFIX}{JSUFFIX}{VFSUFFIX},outeriter*{OUTERFLOPS} + inneriter*{INNERFLOPS}); + /* #elif KERNEL_ELEC != 'None' */ + inc_nrnb(nrnb,eNR_NBKERNEL_ELEC{ISUFFIX}{JSUFFIX}{VFSUFFIX},outeriter*{OUTERFLOPS} + inneriter*{INNERFLOPS}); + /* #else */ + inc_nrnb(nrnb,eNR_NBKERNEL_VDW{ISUFFIX}{JSUFFIX}{VFSUFFIX},outeriter*{OUTERFLOPS} + inneriter*{INNERFLOPS}); + /* #endif */ +} diff --git a/src/gmxlib/nonbonded/nonbonded.c b/src/gmxlib/nonbonded/nonbonded.c index 45288ee6f4..0a576ad075 100644 --- a/src/gmxlib/nonbonded/nonbonded.c +++ b/src/gmxlib/nonbonded/nonbonded.c @@ -98,6 +98,9 @@ #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE) # include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h" #endif +#if (defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE && defined GMX_DOUBLE) +# include "nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h" +#endif #ifdef GMX_THREAD_MPI @@ -150,6 +153,9 @@ gmx_nonbonded_setup(FILE * fplog, #endif #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE) nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size); +#endif +#if (defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE && defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double,kernellist_sparc64_hpc_ace_double_size); #endif ; /* empty statement to avoid a completely empty block */ } @@ -215,6 +221,10 @@ gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl) #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE) /* No padding - see comment above */ { "sse4_1_double", 1 }, +#endif +#if (defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE && defined GMX_DOUBLE) + /* No padding - see comment above */ + { "sparc64_hpc_ace_double", 1 }, #endif { "c", 1 }, };