src/gromacs/ewald/pme-spline-work.cpp: warning: includes "simd.h" unnecessarily
src/gromacs/ewald/pme-spline-work.h: warning: includes "simd.h" unnecessarily
src/gromacs/ewald/pme-spread.cpp: warning: includes "simd.h" unnecessarily
-src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h: warning: includes "simd.h" unnecessarily
src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h: warning: should include "simd.h"
src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h: warning: should include "simd.h"
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h: warning: should include "simd.h"
# These would be nice to fix, but can wait for later / deletion / rewrites
src/gromacs/gmxlib/nonbonded/nb_kernel_*/*: warning: includes "config.h" unnecessarily
-src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h: warning: should include "config.h"
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.cpp: warning: includes "config.h" unnecessarily
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.cpp: warning: includes "config.h" unnecessarily
src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_common.h: warning: should include "config.h"
file(GLOB NONBONDED_AVX_256_DOUBLE_SOURCES nb_kernel_avx_256_double/*.cpp)
endif()
-if("${GMX_SIMD_ACTIVE}" STREQUAL "SPARC64_HPC_ACE" AND GMX_DOUBLE)
- file(GLOB NONBONDED_SPARC64_HPC_ACE_DOUBLE_SOURCES nb_kernel_sparc64_hpc_ace_double/*.cpp)
-endif()
-
-
# These sources will be used in the parent directory's CMakeLists.txt
set(NONBONDED_KERNEL_SOURCES ${NONBONDED_C_SOURCES} ${NONBONDED_SSE2_SINGLE_SOURCES} ${NONBONDED_SSE4_1_SINGLE_SOURCES} ${NONBONDED_AVX_128_FMA_SINGLE_SOURCES} ${NONBONDED_AVX_256_SINGLE_SOURCES} ${NONBONDED_SSE2_DOUBLE_SOURCES} ${NONBONDED_SSE4_1_DOUBLE_SOURCES} ${NONBONDED_AVX_128_FMA_DOUBLE_SOURCES} ${NONBONDED_AVX_256_DOUBLE_SOURCES} ${NONBONDED_SPARC64_HPC_ACE_DOUBLE_SOURCES})
target_sources(libgromacs_generated PRIVATE ${NONBONDED_KERNEL_SOURCES})
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#ifndef _kernelutil_sparc64_hpc_ace_double_h_
-#define _kernelutil_sparc64_hpc_ace_double_h_
-
-/* Get gmx_simd_exp_d() */
-#include "gromacs/simd/simd.h"
-#include "gromacs/simd/simd_math.h"
-
-/* Fujitsu header borrows the name from SSE2, since some instructions have aliases.
- * Environment/compiler version GM-1.2.0-17 seems to be buggy; when -Xg is
- * defined to enable GNUC extensions, this sets _ISOC99_SOURCE, which in
- * turn causes all intrinsics to be declared inline _instead_ of static. This
- * leads to duplicate symbol errors at link time.
- * To work around this we unset this before including the HPC-ACE header, and
- * reset the value afterwards.
- */
-#ifdef _ISOC99_SOURCE
-# undef _ISOC99_SOURCE
-# define SAVE_ISOC99_SOURCE
-#endif
-
-#include <emmintrin.h>
-
-#ifdef SAVE_ISOC99_SOURCE
-# define _ISOC99_SOURCE
-# undef SAVE_ISOC99_SOURCE
-#endif
-
-#define GMX_FJSP_SHUFFLE2(x, y) (((x)<<1) | (y))
-
-#define GMX_FJSP_TRANSPOSE2_V2R8(row0, row1) { \
- _fjsp_v2r8 __gmx_t1 = row0; \
- row0 = _fjsp_unpacklo_v2r8(row0, row1); \
- row1 = _fjsp_unpackhi_v2r8(__gmx_t1, row1); \
-}
-
-
-static void
-gmx_fjsp_print_v2r8(const char *s, _fjsp_v2r8 a)
-{
- double lo, hi;
-
- _fjsp_storel_v2r8(&lo, a);
- _fjsp_storeh_v2r8(&hi, a);
- printf("%s: %g %g\n", s, lo, hi);
-}
-
-
-static _fjsp_v2r8
-gmx_fjsp_set1_v2r8(double d)
-{
- return _fjsp_set_v2r8(d, d);
-}
-
-static _fjsp_v2r8
-gmx_fjsp_load1_v2r8(const double * gmx_restrict ptr)
-{
- return gmx_fjsp_set1_v2r8(*ptr);
-}
-
-
-static int
-gmx_fjsp_any_lt_v2r8(_fjsp_v2r8 a, _fjsp_v2r8 b)
-{
- union
- {
- double d;
- long long int i;
- }
- conv;
-
- a = _fjsp_cmplt_v2r8(a, b);
- a = _fjsp_or_v2r8(a, _fjsp_unpackhi_v2r8(a, a));
- _fjsp_storel_v2r8(&(conv.d), a);
- return (conv.i != 0);
-}
-
-/* 1.0/sqrt(x) */
-static gmx_inline _fjsp_v2r8
-gmx_fjsp_invsqrt_v2r8(_fjsp_v2r8 x)
-{
- const _fjsp_v2r8 half = gmx_fjsp_set1_v2r8(0.5);
- const _fjsp_v2r8 three = gmx_fjsp_set1_v2r8(3.0);
- _fjsp_v2r8 lu = _fjsp_rsqrta_v2r8(x);
-
- lu = _fjsp_mul_v2r8(_fjsp_mul_v2r8(half, lu), _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(lu, lu), x, three));
- /* The HPC-ACE instruction set is only available in double precision, while
- * single precision is typically sufficient for Gromacs. If you define
- * "GMX_RELAXED_DOUBLE_PRECISION" during compile, we stick to two Newton-Raphson
- * iterations and accept 32bits of accuracy in 1.0/sqrt(x) and 1.0/x, rather than full
- * double precision (53 bits). This is still clearly higher than single precision (24 bits).
- */
-#ifndef GMX_RELAXED_DOUBLE_PRECISION
- lu = _fjsp_mul_v2r8(_fjsp_mul_v2r8(half, lu), _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(lu, lu), x, three));
-#endif
- return _fjsp_mul_v2r8(_fjsp_mul_v2r8(half, lu), _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(lu, lu), x, three));
-}
-
-
-/* 1.0/x */
-static gmx_inline _fjsp_v2r8
-gmx_fjsp_inv_v2r8(_fjsp_v2r8 x)
-{
- const _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- __m128d lu = _fjsp_rcpa_v2r8(x);
-
- /* Perform three N-R steps for double precision */
- lu = _fjsp_mul_v2r8(lu, _fjsp_nmsub_v2r8(lu, x, two));
- /* The HPC-ACE instruction set is only available in double precision, while
- * single precision is typically sufficient for Gromacs. If you define
- * "GMX_RELAXED_DOUBLE_PRECISION" during compile, we stick to two Newton-Raphson
- * iterations and accept 32bits of accuracy in 1.0/sqrt(x) and 1.0/x, rather than full
- * double precision (53 bits). This is still clearly higher than single precision (24 bits).
- */
-#ifndef GMX_RELAXED_DOUBLE_PRECISION
- lu = _fjsp_mul_v2r8(lu, _fjsp_nmsub_v2r8(lu, x, two));
-#endif
- return _fjsp_mul_v2r8(lu, _fjsp_nmsub_v2r8(lu, x, two));
-}
-
-
-static gmx_inline _fjsp_v2r8
-gmx_fjsp_calc_rsq_v2r8(_fjsp_v2r8 dx, _fjsp_v2r8 dy, _fjsp_v2r8 dz)
-{
- return _fjsp_madd_v2r8(dx, dx, _fjsp_madd_v2r8(dy, dy, _fjsp_mul_v2r8(dz, dz)));
-}
-
-/* Normal sum of four ymm registers */
-#define gmx_fjsp_sum4_v2r8(t0, t1, t2, t3) _fjsp_add_v2r8(_fjsp_add_v2r8(t0, t1), _fjsp_add_v2r8(t2, t3))
-
-
-
-
-
-static _fjsp_v2r8
-gmx_fjsp_load_2real_swizzle_v2r8(const double * gmx_restrict ptrA,
- const double * gmx_restrict ptrB)
-{
- return _fjsp_unpacklo_v2r8(_fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA), _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB));
-}
-
-static _fjsp_v2r8
-gmx_fjsp_load_1real_v2r8(const double * gmx_restrict ptrA)
-{
- return _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA);
-}
-
-
-static void
-gmx_fjsp_store_2real_swizzle_v2r8(double * gmx_restrict ptrA,
- double * gmx_restrict ptrB,
- _fjsp_v2r8 xmm1)
-{
- _fjsp_v2r8 t2;
-
- t2 = _fjsp_unpackhi_v2r8(xmm1, xmm1);
- _fjsp_storel_v2r8(ptrA, xmm1);
- _fjsp_storel_v2r8(ptrB, t2);
-}
-
-static void
-gmx_fjsp_store_1real_v2r8(double * gmx_restrict ptrA, _fjsp_v2r8 xmm1)
-{
- _fjsp_storel_v2r8(ptrA, xmm1);
-}
-
-
-/* Similar to store, but increments value in memory */
-static void
-gmx_fjsp_increment_2real_swizzle_v2r8(double * gmx_restrict ptrA,
- double * gmx_restrict ptrB, _fjsp_v2r8 xmm1)
-{
- _fjsp_v2r8 t1;
-
- t1 = _fjsp_unpackhi_v2r8(xmm1, xmm1);
- xmm1 = _fjsp_add_v2r8(xmm1, _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA));
- t1 = _fjsp_add_v2r8(t1, _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB));
- _fjsp_storel_v2r8(ptrA, xmm1);
- _fjsp_storel_v2r8(ptrB, t1);
-}
-
-static void
-gmx_fjsp_increment_1real_v2r8(double * gmx_restrict ptrA, _fjsp_v2r8 xmm1)
-{
- _fjsp_v2r8 tmp;
-
- tmp = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA);
- tmp = _fjsp_add_v2r8(tmp, xmm1);
- _fjsp_storel_v2r8(ptrA, tmp);
-}
-
-
-
-static gmx_inline void
-gmx_fjsp_load_2pair_swizzle_v2r8(const double * gmx_restrict p1,
- const double * gmx_restrict p2,
- _fjsp_v2r8 * gmx_restrict c6,
- _fjsp_v2r8 * gmx_restrict c12)
-{
- _fjsp_v2r8 t1, t2, t3;
-
- /* The c6/c12 array should be aligned */
- t1 = _fjsp_load_v2r8(p1);
- t2 = _fjsp_load_v2r8(p2);
- *c6 = _fjsp_unpacklo_v2r8(t1, t2);
- *c12 = _fjsp_unpackhi_v2r8(t1, t2);
-}
-
-static gmx_inline void
-gmx_fjsp_load_1pair_swizzle_v2r8(const double * gmx_restrict p1,
- _fjsp_v2r8 * gmx_restrict c6,
- _fjsp_v2r8 * gmx_restrict c12)
-{
- *c6 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1);
- *c12 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+1);
-}
-
-
-static gmx_inline void
-gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(const double * gmx_restrict xyz_shift,
- const double * gmx_restrict xyz,
- _fjsp_v2r8 * gmx_restrict x1,
- _fjsp_v2r8 * gmx_restrict y1,
- _fjsp_v2r8 * gmx_restrict z1)
-{
- _fjsp_v2r8 mem_xy, mem_z, mem_sxy, mem_sz;
-
- mem_xy = _fjsp_load_v2r8(xyz);
- mem_z = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), xyz+2);
- mem_sxy = _fjsp_load_v2r8(xyz_shift);
- mem_sz = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), xyz_shift+2);
-
- mem_xy = _fjsp_add_v2r8(mem_xy, mem_sxy);
- mem_z = _fjsp_add_v2r8(mem_z, mem_sz);
-
- *x1 = _fjsp_shuffle_v2r8(mem_xy, mem_xy, GMX_FJSP_SHUFFLE2(0, 0));
- *y1 = _fjsp_shuffle_v2r8(mem_xy, mem_xy, GMX_FJSP_SHUFFLE2(1, 1));
- *z1 = _fjsp_shuffle_v2r8(mem_z, mem_z, GMX_FJSP_SHUFFLE2(0, 0));
-}
-
-
-static gmx_inline void
-gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(const double * gmx_restrict xyz_shift,
- const double * gmx_restrict xyz,
- _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1,
- _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2,
- _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3)
-{
- _fjsp_v2r8 t1, t2, t3, t4, t5, sxy, sz, szx, syz;
-
- t1 = _fjsp_load_v2r8(xyz);
- t2 = _fjsp_load_v2r8(xyz+2);
- t3 = _fjsp_load_v2r8(xyz+4);
- t4 = _fjsp_load_v2r8(xyz+6);
- t5 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), xyz+8);
-
- sxy = _fjsp_load_v2r8(xyz_shift);
- sz = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), xyz_shift+2);
- szx = _fjsp_shuffle_v2r8(sz, sxy, GMX_FJSP_SHUFFLE2(0, 0));
- syz = _fjsp_shuffle_v2r8(sxy, sz, GMX_FJSP_SHUFFLE2(0, 1));
-
- t1 = _fjsp_add_v2r8(t1, sxy);
- t2 = _fjsp_add_v2r8(t2, szx);
- t3 = _fjsp_add_v2r8(t3, syz);
- t4 = _fjsp_add_v2r8(t4, sxy);
- t5 = _fjsp_add_v2r8(t5, sz);
-
- *x1 = _fjsp_shuffle_v2r8(t1, t1, GMX_FJSP_SHUFFLE2(0, 0));
- *y1 = _fjsp_shuffle_v2r8(t1, t1, GMX_FJSP_SHUFFLE2(1, 1));
- *z1 = _fjsp_shuffle_v2r8(t2, t2, GMX_FJSP_SHUFFLE2(0, 0));
- *x2 = _fjsp_shuffle_v2r8(t2, t2, GMX_FJSP_SHUFFLE2(1, 1));
- *y2 = _fjsp_shuffle_v2r8(t3, t3, GMX_FJSP_SHUFFLE2(0, 0));
- *z2 = _fjsp_shuffle_v2r8(t3, t3, GMX_FJSP_SHUFFLE2(1, 1));
- *x3 = _fjsp_shuffle_v2r8(t4, t4, GMX_FJSP_SHUFFLE2(0, 0));
- *y3 = _fjsp_shuffle_v2r8(t4, t4, GMX_FJSP_SHUFFLE2(1, 1));
- *z3 = _fjsp_shuffle_v2r8(t5, t5, GMX_FJSP_SHUFFLE2(0, 0));
-}
-
-
-static gmx_inline void
-gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(const double * gmx_restrict xyz_shift,
- const double * gmx_restrict xyz,
- _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1,
- _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2,
- _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3,
- _fjsp_v2r8 * gmx_restrict x4, _fjsp_v2r8 * gmx_restrict y4, _fjsp_v2r8 * gmx_restrict z4)
-{
- _fjsp_v2r8 t1, t2, t3, t4, t5, t6, sxy, sz, szx, syz;
-
- t1 = _fjsp_load_v2r8(xyz);
- t2 = _fjsp_load_v2r8(xyz+2);
- t3 = _fjsp_load_v2r8(xyz+4);
- t4 = _fjsp_load_v2r8(xyz+6);
- t5 = _fjsp_load_v2r8(xyz+8);
- t6 = _fjsp_load_v2r8(xyz+10);
-
- sxy = _fjsp_load_v2r8(xyz_shift);
- sz = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), xyz_shift+2);
- szx = _fjsp_shuffle_v2r8(sz, sxy, GMX_FJSP_SHUFFLE2(0, 0));
- syz = _fjsp_shuffle_v2r8(sxy, sz, GMX_FJSP_SHUFFLE2(0, 1));
-
- t1 = _fjsp_add_v2r8(t1, sxy);
- t2 = _fjsp_add_v2r8(t2, szx);
- t3 = _fjsp_add_v2r8(t3, syz);
- t4 = _fjsp_add_v2r8(t4, sxy);
- t5 = _fjsp_add_v2r8(t5, szx);
- t6 = _fjsp_add_v2r8(t6, syz);
-
- *x1 = _fjsp_shuffle_v2r8(t1, t1, GMX_FJSP_SHUFFLE2(0, 0));
- *y1 = _fjsp_shuffle_v2r8(t1, t1, GMX_FJSP_SHUFFLE2(1, 1));
- *z1 = _fjsp_shuffle_v2r8(t2, t2, GMX_FJSP_SHUFFLE2(0, 0));
- *x2 = _fjsp_shuffle_v2r8(t2, t2, GMX_FJSP_SHUFFLE2(1, 1));
- *y2 = _fjsp_shuffle_v2r8(t3, t3, GMX_FJSP_SHUFFLE2(0, 0));
- *z2 = _fjsp_shuffle_v2r8(t3, t3, GMX_FJSP_SHUFFLE2(1, 1));
- *x3 = _fjsp_shuffle_v2r8(t4, t4, GMX_FJSP_SHUFFLE2(0, 0));
- *y3 = _fjsp_shuffle_v2r8(t4, t4, GMX_FJSP_SHUFFLE2(1, 1));
- *z3 = _fjsp_shuffle_v2r8(t5, t5, GMX_FJSP_SHUFFLE2(0, 0));
- *x4 = _fjsp_shuffle_v2r8(t5, t5, GMX_FJSP_SHUFFLE2(1, 1));
- *y4 = _fjsp_shuffle_v2r8(t6, t6, GMX_FJSP_SHUFFLE2(0, 0));
- *z4 = _fjsp_shuffle_v2r8(t6, t6, GMX_FJSP_SHUFFLE2(1, 1));
-}
-
-
-
-static gmx_inline void
-gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(const double * gmx_restrict p1,
- _fjsp_v2r8 * gmx_restrict x, _fjsp_v2r8 * gmx_restrict y, _fjsp_v2r8 * gmx_restrict z)
-{
- *x = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1);
- *y = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+1);
- *z = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+2);
-}
-
-static gmx_inline void
-gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(const double * gmx_restrict p1,
- _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1,
- _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2,
- _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3)
-{
- *x1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1);
- *y1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+1);
- *z1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+2);
- *x2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+3);
- *y2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+4);
- *z2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+5);
- *x3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+6);
- *y3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+7);
- *z3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+8);
-}
-
-static gmx_inline void
-gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(const double * gmx_restrict p1,
- _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1,
- _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2,
- _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3,
- _fjsp_v2r8 * gmx_restrict x4, _fjsp_v2r8 * gmx_restrict y4, _fjsp_v2r8 * gmx_restrict z4)
-{
- *x1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1);
- *y1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+1);
- *z1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+2);
- *x2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+3);
- *y2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+4);
- *z2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+5);
- *x3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+6);
- *y3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+7);
- *z3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+8);
- *x4 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+9);
- *y4 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+10);
- *z4 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+11);
-}
-
-
-static gmx_inline void
-gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(const double * gmx_restrict ptrA,
- const double * gmx_restrict ptrB,
- _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1)
-{
- _fjsp_v2r8 t1, t2, t3, t4;
- t1 = _fjsp_load_v2r8(ptrA);
- t2 = _fjsp_load_v2r8(ptrB);
- t3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+2);
- t4 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB+2);
- GMX_FJSP_TRANSPOSE2_V2R8(t1, t2);
- *x1 = t1;
- *y1 = t2;
- *z1 = _fjsp_unpacklo_v2r8(t3, t4);
-}
-
-static gmx_inline void
-gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(const double * gmx_restrict ptrA, const double * gmx_restrict ptrB,
- _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1,
- _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2,
- _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3)
-{
- _fjsp_v2r8 t1, t2, t3, t4, t5, t6, t7, t8, t9, t10;
- t1 = _fjsp_load_v2r8(ptrA);
- t2 = _fjsp_load_v2r8(ptrB);
- t3 = _fjsp_load_v2r8(ptrA+2);
- t4 = _fjsp_load_v2r8(ptrB+2);
- t5 = _fjsp_load_v2r8(ptrA+4);
- t6 = _fjsp_load_v2r8(ptrB+4);
- t7 = _fjsp_load_v2r8(ptrA+6);
- t8 = _fjsp_load_v2r8(ptrB+6);
- t9 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+8);
- t10 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB+8);
- GMX_FJSP_TRANSPOSE2_V2R8(t1, t2);
- GMX_FJSP_TRANSPOSE2_V2R8(t3, t4);
- GMX_FJSP_TRANSPOSE2_V2R8(t5, t6);
- GMX_FJSP_TRANSPOSE2_V2R8(t7, t8);
- *x1 = t1;
- *y1 = t2;
- *z1 = t3;
- *x2 = t4;
- *y2 = t5;
- *z2 = t6;
- *x3 = t7;
- *y3 = t8;
- *z3 = _fjsp_unpacklo_v2r8(t9, t10);
-}
-
-
-static gmx_inline void
-gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(const double * gmx_restrict ptrA, const double * gmx_restrict ptrB,
- _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1,
- _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2,
- _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3,
- _fjsp_v2r8 * gmx_restrict x4, _fjsp_v2r8 * gmx_restrict y4, _fjsp_v2r8 * gmx_restrict z4)
-{
- _fjsp_v2r8 t1, t2, t3, t4, t5, t6;
- t1 = _fjsp_load_v2r8(ptrA);
- t2 = _fjsp_load_v2r8(ptrB);
- t3 = _fjsp_load_v2r8(ptrA+2);
- t4 = _fjsp_load_v2r8(ptrB+2);
- t5 = _fjsp_load_v2r8(ptrA+4);
- t6 = _fjsp_load_v2r8(ptrB+4);
- GMX_FJSP_TRANSPOSE2_V2R8(t1, t2);
- GMX_FJSP_TRANSPOSE2_V2R8(t3, t4);
- GMX_FJSP_TRANSPOSE2_V2R8(t5, t6);
- *x1 = t1;
- *y1 = t2;
- *z1 = t3;
- *x2 = t4;
- *y2 = t5;
- *z2 = t6;
- t1 = _fjsp_load_v2r8(ptrA+6);
- t2 = _fjsp_load_v2r8(ptrB+6);
- t3 = _fjsp_load_v2r8(ptrA+8);
- t4 = _fjsp_load_v2r8(ptrB+8);
- t5 = _fjsp_load_v2r8(ptrA+10);
- t6 = _fjsp_load_v2r8(ptrB+10);
- GMX_FJSP_TRANSPOSE2_V2R8(t1, t2);
- GMX_FJSP_TRANSPOSE2_V2R8(t3, t4);
- GMX_FJSP_TRANSPOSE2_V2R8(t5, t6);
- *x3 = t1;
- *y3 = t2;
- *z3 = t3;
- *x4 = t4;
- *y4 = t5;
- *z4 = t6;
-}
-
-
-static void
-gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(double * gmx_restrict ptrA,
- _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1)
-{
- _fjsp_v2r8 t1, t2, t3;
-
- t1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA);
- t2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+1);
- t3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+2);
-
- t1 = _fjsp_sub_v2r8(t1, x1);
- t2 = _fjsp_sub_v2r8(t2, y1);
- t3 = _fjsp_sub_v2r8(t3, z1);
- _fjsp_storel_v2r8(ptrA, t1);
- _fjsp_storel_v2r8(ptrA+1, t2);
- _fjsp_storel_v2r8(ptrA+2, t3);
-}
-
-static void
-gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(double * gmx_restrict ptrA, _fjsp_v2r8 fscal,
- _fjsp_v2r8 dx1, _fjsp_v2r8 dy1, _fjsp_v2r8 dz1)
-{
- _fjsp_v2r8 t1, t2, t3;
-
- t1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA);
- t2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+1);
- t3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+2);
-
- t1 = _fjsp_nmsub_v2r8(fscal, dx1, t1);
- t2 = _fjsp_nmsub_v2r8(fscal, dy1, t2);
- t3 = _fjsp_nmsub_v2r8(fscal, dz1, t3);
- _fjsp_storel_v2r8(ptrA, t1);
- _fjsp_storel_v2r8(ptrA+1, t2);
- _fjsp_storel_v2r8(ptrA+2, t3);
-}
-
-
-static void
-gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(double * gmx_restrict ptrA,
- _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1,
- _fjsp_v2r8 x2, _fjsp_v2r8 y2, _fjsp_v2r8 z2,
- _fjsp_v2r8 x3, _fjsp_v2r8 y3, _fjsp_v2r8 z3)
-{
- _fjsp_v2r8 t1, t2, t3, t4, t5;
-
- t1 = _fjsp_load_v2r8(ptrA);
- t2 = _fjsp_load_v2r8(ptrA+2);
- t3 = _fjsp_load_v2r8(ptrA+4);
- t4 = _fjsp_load_v2r8(ptrA+6);
- t5 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+8);
-
- x1 = _fjsp_unpacklo_v2r8(x1, y1);
- z1 = _fjsp_unpacklo_v2r8(z1, x2);
- y2 = _fjsp_unpacklo_v2r8(y2, z2);
- x3 = _fjsp_unpacklo_v2r8(x3, y3);
- /* nothing to be done for z3 */
-
- t1 = _fjsp_sub_v2r8(t1, x1);
- t2 = _fjsp_sub_v2r8(t2, z1);
- t3 = _fjsp_sub_v2r8(t3, y2);
- t4 = _fjsp_sub_v2r8(t4, x3);
- t5 = _fjsp_sub_v2r8(t5, z3);
- _fjsp_storel_v2r8(ptrA, t1);
- _fjsp_storeh_v2r8(ptrA+1, t1);
- _fjsp_storel_v2r8(ptrA+2, t2);
- _fjsp_storeh_v2r8(ptrA+3, t2);
- _fjsp_storel_v2r8(ptrA+4, t3);
- _fjsp_storeh_v2r8(ptrA+5, t3);
- _fjsp_storel_v2r8(ptrA+6, t4);
- _fjsp_storeh_v2r8(ptrA+7, t4);
- _fjsp_storel_v2r8(ptrA+8, t5);
-}
-
-
-static void
-gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(double * gmx_restrict ptrA,
- _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1,
- _fjsp_v2r8 x2, _fjsp_v2r8 y2, _fjsp_v2r8 z2,
- _fjsp_v2r8 x3, _fjsp_v2r8 y3, _fjsp_v2r8 z3,
- _fjsp_v2r8 x4, _fjsp_v2r8 y4, _fjsp_v2r8 z4)
-{
- _fjsp_v2r8 t1, t2, t3, t4, t5, t6;
-
- t1 = _fjsp_load_v2r8(ptrA);
- t2 = _fjsp_load_v2r8(ptrA+2);
- t3 = _fjsp_load_v2r8(ptrA+4);
- t4 = _fjsp_load_v2r8(ptrA+6);
- t5 = _fjsp_load_v2r8(ptrA+8);
- t6 = _fjsp_load_v2r8(ptrA+10);
-
- x1 = _fjsp_unpacklo_v2r8(x1, y1);
- z1 = _fjsp_unpacklo_v2r8(z1, x2);
- y2 = _fjsp_unpacklo_v2r8(y2, z2);
- x3 = _fjsp_unpacklo_v2r8(x3, y3);
- z3 = _fjsp_unpacklo_v2r8(z3, x4);
- y4 = _fjsp_unpacklo_v2r8(y4, z4);
-
- _fjsp_storel_v2r8(ptrA, _fjsp_sub_v2r8( t1, x1 ));
- _fjsp_storeh_v2r8(ptrA+1, _fjsp_sub_v2r8( t1, x1 ));
- _fjsp_storel_v2r8(ptrA+2, _fjsp_sub_v2r8( t2, z1 ));
- _fjsp_storeh_v2r8(ptrA+3, _fjsp_sub_v2r8( t2, z1 ));
- _fjsp_storel_v2r8(ptrA+4, _fjsp_sub_v2r8( t3, y2 ));
- _fjsp_storeh_v2r8(ptrA+5, _fjsp_sub_v2r8( t3, y2 ));
- _fjsp_storel_v2r8(ptrA+6, _fjsp_sub_v2r8( t4, x3 ));
- _fjsp_storeh_v2r8(ptrA+7, _fjsp_sub_v2r8( t4, x3 ));
- _fjsp_storel_v2r8(ptrA+8, _fjsp_sub_v2r8( t5, z3 ));
- _fjsp_storeh_v2r8(ptrA+9, _fjsp_sub_v2r8( t5, z3 ));
- _fjsp_storel_v2r8(ptrA+10, _fjsp_sub_v2r8( t6, y4 ));
- _fjsp_storeh_v2r8(ptrA+11, _fjsp_sub_v2r8( t6, y4 ));
-}
-
-static void
-gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(double * gmx_restrict ptrA, double * gmx_restrict ptrB,
- _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1)
-{
- _fjsp_v2r8 t1, t2, t3, t4, t5, t6, t7;
-
- t1 = _fjsp_load_v2r8(ptrA);
- t2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+2);
- t3 = _fjsp_load_v2r8(ptrB);
- t4 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB+2);
-
- t5 = _fjsp_unpacklo_v2r8(x1, y1);
- t6 = _fjsp_unpackhi_v2r8(x1, y1);
- t7 = _fjsp_unpackhi_v2r8(z1, z1);
-
- t1 = _fjsp_sub_v2r8(t1, t5);
- t2 = _fjsp_sub_v2r8(t2, z1);
-
- t3 = _fjsp_sub_v2r8(t3, t6);
- t4 = _fjsp_sub_v2r8(t4, t7);
-
- _fjsp_storel_v2r8(ptrA, t1);
- _fjsp_storeh_v2r8(ptrA+1, t1);
- _fjsp_storel_v2r8(ptrA+2, t2);
- _fjsp_storel_v2r8(ptrB, t3);
- _fjsp_storeh_v2r8(ptrB+1, t3);
- _fjsp_storel_v2r8(ptrB+2, t4);
-}
-
-
-static void
-gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(double * gmx_restrict ptrA, double * gmx_restrict ptrB,
- _fjsp_v2r8 fscal, _fjsp_v2r8 dx1, _fjsp_v2r8 dy1, _fjsp_v2r8 dz1)
-{
- _fjsp_v2r8 t1, t2, t3, t4, t5, t6, t7, fscalA, fscalB;
-
- t1 = _fjsp_load_v2r8(ptrA);
- t2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+2);
- t3 = _fjsp_load_v2r8(ptrB);
- t4 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB+2);
- fscalA = _fjsp_unpacklo_v2r8(fscal, fscal);
- fscalB = _fjsp_unpackhi_v2r8(fscal, fscal);
-
- t5 = _fjsp_unpacklo_v2r8(dx1, dy1);
- t6 = _fjsp_unpackhi_v2r8(dx1, dy1);
- t7 = _fjsp_unpackhi_v2r8(dz1, dz1);
-
- t1 = _fjsp_nmsub_v2r8(fscalA, t5, t1);
- t2 = _fjsp_nmsub_v2r8(fscalA, dz1, t2);
-
- t3 = _fjsp_nmsub_v2r8(fscalB, t6, t3);
- t4 = _fjsp_nmsub_v2r8(fscalB, t7, t4);
-
- _fjsp_storel_v2r8(ptrA, t1);
- _fjsp_storeh_v2r8(ptrA+1, t1);
- _fjsp_storel_v2r8(ptrA+2, t2);
- _fjsp_storel_v2r8(ptrB, t3);
- _fjsp_storeh_v2r8(ptrB+1, t3);
- _fjsp_storel_v2r8(ptrB+2, t4);
-}
-
-
-static void
-gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(double * gmx_restrict ptrA, double * gmx_restrict ptrB,
- _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1,
- _fjsp_v2r8 x2, _fjsp_v2r8 y2, _fjsp_v2r8 z2,
- _fjsp_v2r8 x3, _fjsp_v2r8 y3, _fjsp_v2r8 z3)
-{
- _fjsp_v2r8 t1, t2, t3, t4, t5, t6, t7, t8, t9, t10;
- _fjsp_v2r8 tA, tB, tC, tD, tE, tF, tG, tH, tI;
-
- t1 = _fjsp_load_v2r8(ptrA);
- t2 = _fjsp_load_v2r8(ptrA+2);
- t3 = _fjsp_load_v2r8(ptrA+4);
- t4 = _fjsp_load_v2r8(ptrA+6);
- t5 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+8);
- t6 = _fjsp_load_v2r8(ptrB);
- t7 = _fjsp_load_v2r8(ptrB+2);
- t8 = _fjsp_load_v2r8(ptrB+4);
- t9 = _fjsp_load_v2r8(ptrB+6);
- t10 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB+8);
-
- tA = _fjsp_unpacklo_v2r8(x1, y1);
- tB = _fjsp_unpackhi_v2r8(x1, y1);
- tC = _fjsp_unpacklo_v2r8(z1, x2);
- tD = _fjsp_unpackhi_v2r8(z1, x2);
- tE = _fjsp_unpacklo_v2r8(y2, z2);
- tF = _fjsp_unpackhi_v2r8(y2, z2);
- tG = _fjsp_unpacklo_v2r8(x3, y3);
- tH = _fjsp_unpackhi_v2r8(x3, y3);
- tI = _fjsp_unpackhi_v2r8(z3, z3);
-
- t1 = _fjsp_sub_v2r8(t1, tA);
- t2 = _fjsp_sub_v2r8(t2, tC);
- t3 = _fjsp_sub_v2r8(t3, tE);
- t4 = _fjsp_sub_v2r8(t4, tG);
- t5 = _fjsp_sub_v2r8(t5, z3);
-
- t6 = _fjsp_sub_v2r8(t6, tB);
- t7 = _fjsp_sub_v2r8(t7, tD);
- t8 = _fjsp_sub_v2r8(t8, tF);
- t9 = _fjsp_sub_v2r8(t9, tH);
- t10 = _fjsp_sub_v2r8(t10, tI);
-
- _fjsp_storel_v2r8(ptrA, t1);
- _fjsp_storeh_v2r8(ptrA+1, t1);
- _fjsp_storel_v2r8(ptrA+2, t2);
- _fjsp_storeh_v2r8(ptrA+3, t2);
- _fjsp_storel_v2r8(ptrA+4, t3);
- _fjsp_storeh_v2r8(ptrA+5, t3);
- _fjsp_storel_v2r8(ptrA+6, t4);
- _fjsp_storeh_v2r8(ptrA+7, t4);
- _fjsp_storel_v2r8(ptrA+8, t5);
- _fjsp_storel_v2r8(ptrB, t6);
- _fjsp_storeh_v2r8(ptrB+1, t6);
- _fjsp_storel_v2r8(ptrB+2, t7);
- _fjsp_storeh_v2r8(ptrB+3, t7);
- _fjsp_storel_v2r8(ptrB+4, t8);
- _fjsp_storeh_v2r8(ptrB+5, t8);
- _fjsp_storel_v2r8(ptrB+6, t9);
- _fjsp_storeh_v2r8(ptrB+7, t9);
- _fjsp_storel_v2r8(ptrB+8, t10);
-}
-
-
-static void
-gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(double * gmx_restrict ptrA, double * gmx_restrict ptrB,
- _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1,
- _fjsp_v2r8 x2, _fjsp_v2r8 y2, _fjsp_v2r8 z2,
- _fjsp_v2r8 x3, _fjsp_v2r8 y3, _fjsp_v2r8 z3,
- _fjsp_v2r8 x4, _fjsp_v2r8 y4, _fjsp_v2r8 z4)
-{
- _fjsp_v2r8 t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12;
- _fjsp_v2r8 tA, tB, tC, tD, tE, tF, tG, tH, tI, tJ, tK, tL;
-
- t1 = _fjsp_load_v2r8(ptrA);
- t2 = _fjsp_load_v2r8(ptrA+2);
- t3 = _fjsp_load_v2r8(ptrA+4);
- t4 = _fjsp_load_v2r8(ptrA+6);
- t5 = _fjsp_load_v2r8(ptrA+8);
- t6 = _fjsp_load_v2r8(ptrA+10);
- t7 = _fjsp_load_v2r8(ptrB);
- t8 = _fjsp_load_v2r8(ptrB+2);
- t9 = _fjsp_load_v2r8(ptrB+4);
- t10 = _fjsp_load_v2r8(ptrB+6);
- t11 = _fjsp_load_v2r8(ptrB+8);
- t12 = _fjsp_load_v2r8(ptrB+10);
-
- tA = _fjsp_unpacklo_v2r8(x1, y1);
- tB = _fjsp_unpackhi_v2r8(x1, y1);
- tC = _fjsp_unpacklo_v2r8(z1, x2);
- tD = _fjsp_unpackhi_v2r8(z1, x2);
- tE = _fjsp_unpacklo_v2r8(y2, z2);
- tF = _fjsp_unpackhi_v2r8(y2, z2);
- tG = _fjsp_unpacklo_v2r8(x3, y3);
- tH = _fjsp_unpackhi_v2r8(x3, y3);
- tI = _fjsp_unpacklo_v2r8(z3, x4);
- tJ = _fjsp_unpackhi_v2r8(z3, x4);
- tK = _fjsp_unpacklo_v2r8(y4, z4);
- tL = _fjsp_unpackhi_v2r8(y4, z4);
-
- t1 = _fjsp_sub_v2r8(t1, tA);
- t2 = _fjsp_sub_v2r8(t2, tC);
- t3 = _fjsp_sub_v2r8(t3, tE);
- t4 = _fjsp_sub_v2r8(t4, tG);
- t5 = _fjsp_sub_v2r8(t5, tI);
- t6 = _fjsp_sub_v2r8(t6, tK);
-
- t7 = _fjsp_sub_v2r8(t7, tB);
- t8 = _fjsp_sub_v2r8(t8, tD);
- t9 = _fjsp_sub_v2r8(t9, tF);
- t10 = _fjsp_sub_v2r8(t10, tH);
- t11 = _fjsp_sub_v2r8(t11, tJ);
- t12 = _fjsp_sub_v2r8(t12, tL);
-
- _fjsp_storel_v2r8(ptrA, t1);
- _fjsp_storeh_v2r8(ptrA+1, t1);
- _fjsp_storel_v2r8(ptrA+2, t2);
- _fjsp_storeh_v2r8(ptrA+3, t2);
- _fjsp_storel_v2r8(ptrA+4, t3);
- _fjsp_storeh_v2r8(ptrA+5, t3);
- _fjsp_storel_v2r8(ptrA+6, t4);
- _fjsp_storeh_v2r8(ptrA+7, t4);
- _fjsp_storel_v2r8(ptrA+8, t5);
- _fjsp_storeh_v2r8(ptrA+9, t5);
- _fjsp_storel_v2r8(ptrA+10, t6);
- _fjsp_storeh_v2r8(ptrA+11, t6);
- _fjsp_storel_v2r8(ptrB, t7);
- _fjsp_storeh_v2r8(ptrB+1, t7);
- _fjsp_storel_v2r8(ptrB+2, t8);
- _fjsp_storeh_v2r8(ptrB+3, t8);
- _fjsp_storel_v2r8(ptrB+4, t9);
- _fjsp_storeh_v2r8(ptrB+5, t9);
- _fjsp_storel_v2r8(ptrB+6, t10);
- _fjsp_storeh_v2r8(ptrB+7, t10);
- _fjsp_storel_v2r8(ptrB+8, t11);
- _fjsp_storeh_v2r8(ptrB+9, t11);
- _fjsp_storel_v2r8(ptrB+10, t12);
- _fjsp_storeh_v2r8(ptrB+11, t12);
-}
-
-
-
-static gmx_inline void
-gmx_fjsp_update_iforce_1atom_swizzle_v2r8(_fjsp_v2r8 fix1, _fjsp_v2r8 fiy1, _fjsp_v2r8 fiz1,
- double * gmx_restrict fptr,
- double * gmx_restrict fshiftptr)
-{
- __m128d t1, t2, t3, t4;
-
- /* transpose data */
- t1 = fix1;
- fix1 = _fjsp_unpacklo_v2r8(fix1, fiy1); /* y0 x0 */
- fiy1 = _fjsp_unpackhi_v2r8(t1, fiy1); /* y1 x1 */
-
- fix1 = _fjsp_add_v2r8(fix1, fiy1);
- fiz1 = _fjsp_add_v2r8( fiz1, _fjsp_unpackhi_v2r8(fiz1, fiz1 ));
-
- t4 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr), fix1 );
- _fjsp_storel_v2r8( fptr, t4 );
- _fjsp_storeh_v2r8( fptr+1, t4 );
- _fjsp_storel_v2r8( fptr+2, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), fptr+2), fiz1 ));
-
- t4 = _fjsp_add_v2r8( _fjsp_load_v2r8(fshiftptr), fix1 );
- _fjsp_storel_v2r8( fshiftptr, t4 );
- _fjsp_storeh_v2r8( fshiftptr+1, t4 );
- _fjsp_storel_v2r8( fshiftptr+2, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), fshiftptr+2), fiz1 ));
-}
-
-static gmx_inline void
-gmx_fjsp_update_iforce_3atom_swizzle_v2r8(_fjsp_v2r8 fix1, _fjsp_v2r8 fiy1, _fjsp_v2r8 fiz1,
- _fjsp_v2r8 fix2, _fjsp_v2r8 fiy2, _fjsp_v2r8 fiz2,
- _fjsp_v2r8 fix3, _fjsp_v2r8 fiy3, _fjsp_v2r8 fiz3,
- double * gmx_restrict fptr,
- double * gmx_restrict fshiftptr)
-{
- __m128d t1, t2, t3, t4, t5, t6;
-
- /* transpose data */
- GMX_FJSP_TRANSPOSE2_V2R8(fix1, fiy1);
- GMX_FJSP_TRANSPOSE2_V2R8(fiz1, fix2);
- GMX_FJSP_TRANSPOSE2_V2R8(fiy2, fiz2);
- t1 = fix3;
- fix3 = _fjsp_unpacklo_v2r8(fix3, fiy3); /* y0 x0 */
- fiy3 = _fjsp_unpackhi_v2r8(t1, fiy3); /* y1 x1 */
-
- fix1 = _fjsp_add_v2r8(fix1, fiy1);
- fiz1 = _fjsp_add_v2r8(fiz1, fix2);
- fiy2 = _fjsp_add_v2r8(fiy2, fiz2);
-
- fix3 = _fjsp_add_v2r8(fix3, fiy3);
- fiz3 = _fjsp_add_v2r8( fiz3, _fjsp_unpackhi_v2r8(fiz3, fiz3));
-
- t3 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr), fix1 );
- t4 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+2), fiz1 );
- t5 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+4), fiy2 );
- t6 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+6), fix3 );
-
- _fjsp_storel_v2r8( fptr, t3 );
- _fjsp_storeh_v2r8( fptr+1, t3 );
- _fjsp_storel_v2r8( fptr+2, t4 );
- _fjsp_storeh_v2r8( fptr+3, t4 );
- _fjsp_storel_v2r8( fptr+4, t5 );
- _fjsp_storeh_v2r8( fptr+5, t5 );
- _fjsp_storel_v2r8( fptr+6, t6 );
- _fjsp_storeh_v2r8( fptr+7, t6 );
- _fjsp_storel_v2r8( fptr+8, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), fptr+8), fiz3 ));
-
- fix1 = _fjsp_add_v2r8(fix1, fix3);
- t1 = _fjsp_shuffle_v2r8(fiz1, fiy2, GMX_FJSP_SHUFFLE2(0, 1));
- fix1 = _fjsp_add_v2r8(fix1, t1); /* x and y sums */
-
- t2 = _fjsp_shuffle_v2r8(fiy2, fiy2, GMX_FJSP_SHUFFLE2(1, 1));
- fiz1 = _fjsp_add_v2r8(fiz1, fiz3);
- fiz1 = _fjsp_add_v2r8(fiz1, t2); /* z sum */
-
- t3 = _fjsp_add_v2r8( _fjsp_load_v2r8(fshiftptr), fix1 );
- _fjsp_storel_v2r8( fshiftptr, t3 );
- _fjsp_storeh_v2r8( fshiftptr+1, t3 );
- _fjsp_storel_v2r8( fshiftptr+2, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), fshiftptr+2), fiz1 ));
-}
-
-
-static gmx_inline void
-gmx_fjsp_update_iforce_4atom_swizzle_v2r8(_fjsp_v2r8 fix1, _fjsp_v2r8 fiy1, _fjsp_v2r8 fiz1,
- _fjsp_v2r8 fix2, _fjsp_v2r8 fiy2, _fjsp_v2r8 fiz2,
- _fjsp_v2r8 fix3, _fjsp_v2r8 fiy3, _fjsp_v2r8 fiz3,
- _fjsp_v2r8 fix4, _fjsp_v2r8 fiy4, _fjsp_v2r8 fiz4,
- double * gmx_restrict fptr,
- double * gmx_restrict fshiftptr)
-{
- __m128d t1, t2, t3, t4, t5, t6, t7, t8;
-
- /* transpose data */
- GMX_FJSP_TRANSPOSE2_V2R8(fix1, fiy1);
- GMX_FJSP_TRANSPOSE2_V2R8(fiz1, fix2);
- GMX_FJSP_TRANSPOSE2_V2R8(fiy2, fiz2);
- GMX_FJSP_TRANSPOSE2_V2R8(fix3, fiy3);
- GMX_FJSP_TRANSPOSE2_V2R8(fiz3, fix4);
- GMX_FJSP_TRANSPOSE2_V2R8(fiy4, fiz4);
-
- fix1 = _fjsp_add_v2r8(fix1, fiy1);
- fiz1 = _fjsp_add_v2r8(fiz1, fix2);
- fiy2 = _fjsp_add_v2r8(fiy2, fiz2);
- fix3 = _fjsp_add_v2r8(fix3, fiy3);
- fiz3 = _fjsp_add_v2r8(fiz3, fix4);
- fiy4 = _fjsp_add_v2r8(fiy4, fiz4);
-
- t3 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr), fix1 );
- t4 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+2), fiz1 );
- t5 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+4), fiy2 );
- t6 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+6), fix3 );
- t7 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+8), fiz3 );
- t8 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+10), fiy4 );
- _fjsp_storel_v2r8( fptr, t3 );
- _fjsp_storeh_v2r8( fptr+1, t3 );
- _fjsp_storel_v2r8( fptr+2, t4 );
- _fjsp_storeh_v2r8( fptr+3, t4 );
- _fjsp_storel_v2r8( fptr+4, t5 );
- _fjsp_storeh_v2r8( fptr+5, t5 );
- _fjsp_storel_v2r8( fptr+6, t6 );
- _fjsp_storeh_v2r8( fptr+7, t6 );
- _fjsp_storel_v2r8( fptr+8, t7 );
- _fjsp_storeh_v2r8( fptr+9, t7 );
- _fjsp_storel_v2r8( fptr+10, t8 );
- _fjsp_storeh_v2r8( fptr+11, t8 );
-
- t1 = _fjsp_shuffle_v2r8(fiz1, fiy2, GMX_FJSP_SHUFFLE2(0, 1));
- fix1 = _fjsp_add_v2r8(fix1, t1);
- t2 = _fjsp_shuffle_v2r8(fiz3, fiy4, GMX_FJSP_SHUFFLE2(0, 1));
- fix3 = _fjsp_add_v2r8(fix3, t2);
- fix1 = _fjsp_add_v2r8(fix1, fix3); /* x and y sums */
-
- fiz1 = _fjsp_add_v2r8(fiz1, _fjsp_unpackhi_v2r8(fiy2, fiy2));
- fiz3 = _fjsp_add_v2r8(fiz3, _fjsp_unpackhi_v2r8(fiy4, fiy4));
- fiz1 = _fjsp_add_v2r8(fiz1, fiz3); /* z sum */
-
- t3 = _fjsp_add_v2r8( _fjsp_load_v2r8(fshiftptr), fix1 );
- _fjsp_storel_v2r8( fshiftptr, t3 );
- _fjsp_storeh_v2r8( fshiftptr+1, t3 );
- _fjsp_storel_v2r8( fshiftptr+2, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), fshiftptr+2), fiz1 ));
-}
-
-
-
-static gmx_inline void
-gmx_fjsp_update_1pot_v2r8(_fjsp_v2r8 pot1, double * gmx_restrict ptrA)
-{
- pot1 = _fjsp_add_v2r8(pot1, _fjsp_unpackhi_v2r8(pot1, pot1));
- _fjsp_storel_v2r8(ptrA, _fjsp_add_v2r8(pot1, _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA)));
-}
-
-static gmx_inline void
-gmx_fjsp_update_2pot_v2r8(_fjsp_v2r8 pot1, double * gmx_restrict ptrA,
- _fjsp_v2r8 pot2, double * gmx_restrict ptrB)
-{
- GMX_FJSP_TRANSPOSE2_V2R8(pot1, pot2);
- pot1 = _fjsp_add_v2r8(pot1, pot2);
- pot2 = _fjsp_unpackhi_v2r8(pot1, pot1);
-
- _fjsp_storel_v2r8(ptrA, _fjsp_add_v2r8(pot1, _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA)));
- _fjsp_storel_v2r8(ptrB, _fjsp_add_v2r8(pot2, _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB)));
-}
-
-
-#endif /* _kernelutil_sparc64_hpc_ace_double_h_ */
+++ /dev/null
-#!/usr/bin/env python2
-#
-# This file is part of the GROMACS molecular simulation package.
-#
-# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
-# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
-# and including many others, as listed in the AUTHORS file in the
-# top-level source directory and at http://www.gromacs.org.
-#
-# GROMACS is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public License
-# as published by the Free Software Foundation; either version 2.1
-# of the License, or (at your option) any later version.
-#
-# GROMACS is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with GROMACS; if not, see
-# http://www.gnu.org/licenses, or write to the Free Software Foundation,
-# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# If you want to redistribute modifications to GROMACS, please
-# consider that scientific software is very special. Version
-# control is crucial - bugs must be traceable. We will be happy to
-# consider code for inclusion in the official distribution, but
-# derived work must not be called official GROMACS. Details are found
-# in the README & COPYING files - if they are missing, get the
-# official version at http://www.gromacs.org.
-#
-# To help us fund GROMACS development, we humbly ask that you cite
-# the research papers on the package. Check out http://www.gromacs.org.
-
-import sys
-import os
-sys.path.append("../preprocessor")
-sys.path.append("../../../../../admin")
-from copyright import create_copyright_header
-from gmxpreprocess import gmxpreprocess
-
-# "The happiest programs are programs that write other programs."
-#
-#
-# This script controls the generation of Gromacs nonbonded kernels.
-#
-# We no longer generate kernels on-the-fly, so this file is not run
-# during a Gromacs compile - only when we need to update the kernels (=rarely).
-#
-# To maximize performance, each combination of interactions in Gromacs
-# has a separate nonbonded kernel without conditionals in the code.
-# To avoid writing hundreds of different routines for each architecture,
-# we instead use a custom preprocessor so we can encode the conditionals
-# and expand for-loops (e.g, for water-water interactions)
-# from a general kernel template. While that file will contain quite a
-# few preprocessor directives, it is still an order of magnitude easier
-# to maintain than ~200 different kernels (not to mention it avoids bugs).
-#
-# To actually generate the kernels, this program iteratively calls the
-# preprocessor with different define settings corresponding to all
-# combinations of coulomb/van-der-Waals/geometry options.
-#
-# A main goal in the design was to make this new generator _general_. For
-# this reason we have used a lot of different fields to identify a particular
-# kernel and interaction. Basically, each kernel will have a name like
-#
-# nbkernel_ElecXX_VdwYY_GeomZZ_VF_QQ()
-#
-# Where XX/YY/ZZ/VF are strings to identify what the kernel computes.
-#
-# Elec/Vdw describe the type of interaction for electrostatics and van der Waals.
-# The geometry settings correspond e.g. to water-water or water-particle kernels,
-# and finally the VF setting is V,F,or VF depending on whether we calculate
-# only the potential, only the force, or both of them. The final string (QQ)
-# is the architecture/language/optimization of the kernel.
-#
-Arch = 'sparc64_hpc_ace_double'
-
-# Explanation of the 'properties':
-#
-# It is cheap to compute r^2, and the kernels require various other functions of r for
-# different kinds of interaction. Depending on the needs of the kernel and the available
-# processor instructions, this will be done in different ways.
-#
-# 'rinv' means we need 1/r, which is calculated as 1/sqrt(r^2).
-# 'rinvsq' means we need 1/(r*r). This is calculated as rinv*rinv if we already did rinv, otherwise 1/r^2.
-# 'r' is similarly calculated as r^2*rinv when needed
-# 'table' means the interaction is tabulated, in which case we will calculate a table index before the interaction
-# 'shift' means the interaction will be modified by a constant to make it zero at the cutoff.
-# 'cutoff' means the interaction is set to 0.0 outside the cutoff
-#
-
-FileHeader = create_copyright_header('2012,2013,2014,2015,2017,2018')
-FileHeader += """/*
- * Note: this file was generated by the GROMACS """+Arch+""" kernel generator.
- */
-"""
-
-###############################################
-# ELECTROSTATICS
-# Interactions and flags for them
-###############################################
-ElectrostaticsList = {
- 'None' : [],
- 'Coulomb' : ['rinv','rinvsq'],
- 'ReactionField' : ['rinv','rinvsq'],
- 'CubicSplineTable' : ['rinv','r','table'],
- 'Ewald' : ['rinv','rinvsq','r'],
-}
-
-
-###############################################
-# VAN DER WAALS
-# Interactions and flags for them
-###############################################
-VdwList = {
- 'None' : [],
- 'LennardJones' : ['rinvsq'],
-# 'Buckingham' : ['rinv','rinvsq','r'], # Disabled for sse4.1 to reduce number of kernels and simply the template
- 'CubicSplineTable' : ['rinv','r','table'],
- 'LJEwald' : ['rinv','rinvsq','r'],
-}
-
-
-###############################################
-# MODIFIERS
-# Different ways to adjust/modify interactions to conserve energy
-###############################################
-ModifierList = {
- 'None' : [],
- 'ExactCutoff' : ['exactcutoff'], # Zero the interaction outside the cutoff, used for reaction-field-zero
- 'PotentialShift' : ['shift','exactcutoff'],
- 'PotentialSwitch' : ['rinv','r','switch','exactcutoff']
-}
-
-
-###############################################
-# GEOMETRY COMBINATIONS
-###############################################
-GeometryNameList = [
- [ 'Particle' , 'Particle' ],
- [ 'Water3' , 'Particle' ],
- [ 'Water3' , 'Water3' ],
- [ 'Water4' , 'Particle' ],
- [ 'Water4' , 'Water4' ]
-]
-
-
-###############################################
-# POTENTIAL / FORCE
-###############################################
-VFList = [
- 'PotentialAndForce',
-# 'Potential', # Not used yet
- 'Force'
-]
-
-
-###############################################
-# GEOMETRY PROPERTIES
-###############################################
-# Dictionaries with lists telling which interactions are present
-# 1,2,3 means particles 1,2,3 (but not 0) have electrostatics!
-GeometryElectrostatics = {
- 'Particle' : [ 0 ],
- 'Particle2' : [ 0 , 1 ],
- 'Particle3' : [ 0 , 1 , 2 ],
- 'Particle4' : [ 0 , 1 , 2 , 3 ],
- 'Water3' : [ 0 , 1 , 2 ],
- 'Water4' : [ 1 , 2 , 3 ]
-}
-
-GeometryVdw = {
- 'Particle' : [ 0 ],
- 'Particle2' : [ 0 , 1 ],
- 'Particle3' : [ 0 , 1 , 2 ],
- 'Particle4' : [ 0 , 1 , 2 , 3 ],
- 'Water3' : [ 0 ],
- 'Water4' : [ 0 ]
-}
-
-
-
-
-# Dictionary to abbreviate all strings (mixed from all the lists)
-Abbreviation = {
- 'None' : 'None',
- 'Coulomb' : 'Coul',
- 'Ewald' : 'Ew',
- 'ReactionField' : 'RF',
- 'CubicSplineTable' : 'CSTab',
- 'LennardJones' : 'LJ',
- 'Buckingham' : 'Bham',
- 'LJEwald' : 'LJEw',
- 'PotentialShift' : 'Sh',
- 'PotentialSwitch' : 'Sw',
- 'ExactCutoff' : 'Cut',
- 'PotentialAndForce' : 'VF',
- 'Potential' : 'V',
- 'Force' : 'F',
- 'Water3' : 'W3',
- 'Water4' : 'W4',
- 'Particle' : 'P1',
- 'Particle2' : 'P2',
- 'Particle3' : 'P3',
- 'Particle4' : 'P4'
-}
-
-
-###############################################
-# Functions
-###############################################
-
-# Return a string with the kernel name from current settings
-def MakeKernelFileName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom):
- ElecStr = 'Elec' + Abbreviation[KernelElec]
- if(KernelElecMod!='None'):
- ElecStr = ElecStr + Abbreviation[KernelElecMod]
- VdwStr = 'Vdw' + Abbreviation[KernelVdw]
- if(KernelVdwMod!='None'):
- VdwStr = VdwStr + Abbreviation[KernelVdwMod]
- GeomStr = 'Geom' + Abbreviation[KernelGeom[0]] + Abbreviation[KernelGeom[1]]
- return 'nb_kernel_' + ElecStr + '_' + VdwStr + '_' + GeomStr + '_' + Arch
-
-def MakeKernelName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF):
- ElecStr = 'Elec' + Abbreviation[KernelElec]
- if(KernelElecMod!='None'):
- ElecStr = ElecStr + Abbreviation[KernelElecMod]
- VdwStr = 'Vdw' + Abbreviation[KernelVdw]
- if(KernelVdwMod!='None'):
- VdwStr = VdwStr + Abbreviation[KernelVdwMod]
- GeomStr = 'Geom' + Abbreviation[KernelGeom[0]] + Abbreviation[KernelGeom[1]]
- VFStr = Abbreviation[KernelVF]
- return 'nb_kernel_' + ElecStr + '_' + VdwStr + '_' + GeomStr + '_' + VFStr + '_' + Arch
-
-# Return a string with a declaration to use for the kernel;
-# this will be a sequence of string combinations as well as the actual function name
-# Dont worry about field widths - that is just pretty-printing for the header!
-def MakeKernelDecl(KernelName,KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelOther,KernelVF):
- KernelStr = '\"'+KernelName+'\"'
- ArchStr = '\"'+Arch+'\"'
- ElecStr = '\"'+KernelElec+'\"'
- ElecModStr = '\"'+KernelElecMod+'\"'
- VdwStr = '\"'+KernelVdw+'\"'
- VdwModStr = '\"'+KernelVdwMod+'\"'
- GeomStr = '\"'+KernelGeom[0]+KernelGeom[1]+'\"'
- OtherStr = '\"'+KernelOther+'\"'
- VFStr = '\"'+KernelVF+'\"'
-
- ThisSpec = ArchStr+', '+ElecStr+', '+ElecModStr+', '+VdwStr+', '+VdwModStr+', '+GeomStr+', '+OtherStr+', '+VFStr
- ThisDecl = ' { '+KernelName+', '+KernelStr+', '+ThisSpec+' }'
- return ThisDecl
-
-
-# Returns 1 if this kernel should be created, 0 if we should skip it
-# This routine is not critical - it is not the end of the world if we create more kernels,
-# but since the number is pretty large we save both space and compile-time by reducing it a bit.
-def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF):
-
- # No need for kernels without interactions
- if(KernelElec=='None' and KernelVdw=='None'):
- return 0
-
- # No need for modifiers without interactions
- if((KernelElec=='None' and KernelElecMod!='None') or (KernelVdw=='None' and KernelVdwMod!='None')):
- return 0
-
- # No need for LJ-only water optimization, or water optimization with implicit solvent.
- if('Water' in KernelGeom[0] and KernelElec=='None'):
- return 0
-
- # Non-matching table settings are pointless
- if( ('Table' in KernelElec) and ('Table' in KernelVdw) and KernelElec!=KernelVdw ):
- return 0
-
- # Try to reduce the number of different switch/shift options to get a reasonable number of kernels
- # For electrostatics, reaction-field can use 'exactcutoff', and ewald can use switch or shift.
- if(KernelElecMod=='ExactCutoff' and KernelElec!='ReactionField'):
- return 0
- if(KernelElecMod in ['PotentialShift','PotentialSwitch'] and KernelElec!='Ewald'):
- return 0
- # For Vdw, we support switch and shift for Lennard-Jones/Buckingham
- if((KernelVdwMod=='ExactCutoff') or
- (KernelVdwMod in ['PotentialShift','PotentialSwitch'] and KernelVdw not in ['LennardJones','Buckingham','LJEwald'])):
- return 0
-
- # For LJEwald, we only support shift
- if(KernelVdw=='LJEwald' and KernelVdwMod=='PotentialSwitch'):
- return 0
-
- # Choose either switch or shift and don't mix them...
- if((KernelElecMod=='PotentialShift' and KernelVdwMod=='PotentialSwitch') or
- (KernelElecMod=='PotentialSwitch' and KernelVdwMod=='PotentialShift')):
- return 0
-
- # Don't use a Vdw kernel with a modifier if the electrostatics one does not have one
- if(KernelElec!='None' and KernelElecMod=='None' and KernelVdwMod!='None'):
- return 0
-
- # Don't use an electrostatics kernel with a modifier if the vdw one does not have one,
- # unless the electrostatics one is reaction-field with exact cutoff.
- if(KernelVdw!='None' and KernelVdwMod=='None' and KernelElecMod!='None'):
- if(KernelElec=='ReactionField' and KernelVdw!='CubicSplineTable'):
- return 0
- elif(KernelElec!='ReactionField'):
- return 0
-
- #Only do LJ-PME if we are also doing PME for electrostatics, or no electrostatics at all.
- if(KernelVdw=='LJEwald' and KernelElec not in ['Ewald','None']):
- return 0
-
- return 1
-
-
-
-#
-# The preprocessor will automatically expand the interactions for water and other
-# geometries inside the kernel, but to get this right we need to setup a couple
-# of defines - we do them in a separate routine to keep the main loop clean.
-#
-# While this routine might look a bit complex it is actually quite straightforward,
-# and the best news is that you wont have to modify _anything_ for a new geometry
-# as long as you correctly define its Electrostatics/Vdw geometry in the lists above!
-#
-def SetDefines(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF,defines):
- # What is the _name_ for the i/j group geometry?
- igeometry = KernelGeom[0]
- jgeometry = KernelGeom[1]
- # define so we can access it in the source when the preprocessor runs
- defines['GEOMETRY_I'] = igeometry
- defines['GEOMETRY_J'] = jgeometry
-
- # For the i/j groups, extract a python list of which sites have electrostatics
- # For SPC/TIP3p this will be [1,1,1], while TIP4p (no elec on first site) will be [0,1,1,1]
- ielec = GeometryElectrostatics[igeometry]
- jelec = GeometryElectrostatics[jgeometry]
- # Zero out the corresponding lists in case we dont do Elec
- if(KernelElec=='None'):
- ielec = []
- jelec = []
-
- # Extract similar interaction lists for Vdw interactions (example for SPC: [1,0,0])
- iVdw = GeometryVdw[igeometry]
- jVdw = GeometryVdw[jgeometry]
-
- # Zero out the corresponding lists in case we dont do Vdw
- if(KernelVdw=='None'):
- iVdw = []
- jVdw = []
-
- # iany[] and jany[] contains lists of the particles actually used (for interactions) in this kernel
- iany = list(set(ielec+iVdw)) # convert to+from set to make elements unique
- jany = list(set(jelec+jVdw))
-
- defines['PARTICLES_ELEC_I'] = ielec
- defines['PARTICLES_ELEC_J'] = jelec
- defines['PARTICLES_VDW_I'] = iVdw
- defines['PARTICLES_VDW_J'] = jVdw
- defines['PARTICLES_I'] = iany
- defines['PARTICLES_J'] = jany
-
- # elecij,Vdwij are sets with pairs of particles for which the corresponding interaction is done
- # (and anyij again corresponds to either electrostatics or Vdw)
- elecij = []
- Vdwij = []
- anyij = []
-
- for i in ielec:
- for j in jelec:
- elecij.append([i,j])
-
- for i in iVdw:
- for j in jVdw:
- Vdwij.append([i,j])
-
- for i in iany:
- for j in jany:
- if [i,j] in elecij or [i,j] in Vdwij:
- anyij.append([i,j])
-
- defines['PAIRS_IJ'] = anyij
-
- # Make an 2d list-of-distance-properties-to-calculate for i,j
- ni = max(iany)+1
- nj = max(jany)+1
- # Each element properties[i][j] is an empty list
- properties = [ [ [] for j in range(0,nj) ] for i in range (0,ni) ]
- # Add properties to each set
- for i in range(0,ni):
- for j in range(0,nj):
- if [i,j] in elecij:
- properties[i][j] = properties[i][j] + ['electrostatics'] + ElectrostaticsList[KernelElec] + ModifierList[KernelElecMod]
- if [i,j] in Vdwij:
- properties[i][j] = properties[i][j] + ['vdw'] + VdwList[KernelVdw] + ModifierList[KernelVdwMod]
- # Add rinv if we need r
- if 'r' in properties[i][j]:
- properties[i][j] = properties[i][j] + ['rinv']
- # Add rsq if we need rinv or rinsq
- if 'rinv' in properties[i][j] or 'rinvsq' in properties[i][j]:
- properties[i][j] = properties[i][j] + ['rsq']
-
- defines['INTERACTION_FLAGS'] = properties
-
-
-
-def PrintStatistics(ratio):
- ratio = 100.0*ratio
- print '\rGenerating %s nonbonded kernels... %5.1f%%' % (Arch,ratio),
- sys.stdout.flush()
-
-
-
-defines = {}
-kerneldecl = []
-
-cnt = 0.0
-nelec = len(ElectrostaticsList)
-nVdw = len(VdwList)
-nmod = len(ModifierList)
-ngeom = len(GeometryNameList)
-
-ntot = nelec*nmod*nVdw*nmod*ngeom
-
-numKernels = 0
-
-fpdecl = open('nb_kernel_' + Arch + '.cpp','w')
-fpdecl.write( FileHeader )
-fpdecl.write( '#include "gmxpre.h"\n\n' )
-fpdecl.write( '#include "gromacs/gmxlib/nonbonded/nb_kernel.h"\n\n' )
-
-for KernelElec in ElectrostaticsList:
- defines['KERNEL_ELEC'] = KernelElec
-
- for KernelElecMod in ModifierList:
- defines['KERNEL_MOD_ELEC'] = KernelElecMod
-
- for KernelVdw in VdwList:
- defines['KERNEL_VDW'] = KernelVdw
-
- for KernelVdwMod in ModifierList:
- defines['KERNEL_MOD_VDW'] = KernelVdwMod
-
- for KernelGeom in GeometryNameList:
-
- cnt += 1
- KernelFilename = MakeKernelFileName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom) + '.cpp'
- fpkernel = open(KernelFilename,'w')
- defines['INCLUDE_HEADER'] = 1 # Include header first time in new file
- DoHeader = 1
-
- for KernelVF in VFList:
-
- KernelName = MakeKernelName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF)
-
- defines['KERNEL_NAME'] = KernelName
- defines['KERNEL_VF'] = KernelVF
-
- # Check if this is a valid/sane/usable combination
- if not KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF):
- continue;
-
- # The overall kernel settings determine what the _kernel_ calculates, but for the water
- # kernels this does not mean that every pairwise interaction has e.g. Vdw interactions.
- # This routine sets defines of what to calculate for each pair of particles in those cases.
- SetDefines(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF,defines)
-
- if(DoHeader==1):
- fpkernel.write( FileHeader )
-
- gmxpreprocess('nb_kernel_template_' + Arch + '.pre', KernelName+'.tmp' , defines, force=1,contentType='C')
- numKernels = numKernels + 1
-
- defines['INCLUDE_HEADER'] = 0 # Header has been included once now
- DoHeader=0
-
- # Append temp file contents to the common kernelfile
- fptmp = open(KernelName+'.tmp','r')
- fpkernel.writelines(fptmp.readlines())
- fptmp.close()
- os.remove(KernelName+'.tmp')
-
- # Add an extern declaration for this kernel
- fpdecl.write('extern nb_kernel_t ' + KernelName + ';\n');
-
- # Add declaration to the buffer
- KernelOther=''
- kerneldecl.append(MakeKernelDecl(KernelName,KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelOther,KernelVF))
-
- filesize = fpkernel.tell()
- fpkernel.close()
- if(filesize==0):
- os.remove(KernelFilename)
-
- PrintStatistics(cnt/ntot)
- pass
- pass
- pass
- pass
-pass
-
-# Write out the list of settings and corresponding kernels to the declaration file
-fpdecl.write( '\n\n' )
-fpdecl.write( 'nb_kernel_info_t\n' )
-fpdecl.write( ' kernellist_'+Arch+'[] =\n' )
-fpdecl.write( '{\n' )
-for decl in kerneldecl[0:-1]:
- fpdecl.write( decl + ',\n' )
-fpdecl.write( kerneldecl[-1] + '\n' )
-fpdecl.write( '};\n\n' )
-fpdecl.write( 'int\n' )
-fpdecl.write( ' kernellist_'+Arch+'_size = sizeof(kernellist_'+Arch+')/sizeof(kernellist_'+Arch+'[0]);\n')
-fpdecl.close()
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: CubicSplineTable
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq00,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 76 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq00,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 76 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 9 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*76);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: CubicSplineTable
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 64 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 64 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*64);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq00,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq10,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq20,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 171 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq00,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq10,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq20,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 171 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*171);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 151 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 151 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*151);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq00,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r01,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq01,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r02,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq02,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq10,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq11,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq12,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq20,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq21,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq22,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 444 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq00,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r01,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq01,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r02,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq02,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq10,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq11,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq12,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq20,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq21,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq22,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 444 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*444);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r01,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r02,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 400 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r01,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r02,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 400 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*400);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq10,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq20,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r30,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq30,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 200 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq10,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq20,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r30,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq30,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 200 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*200);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r30,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 180 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r30,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 180 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*180);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq11,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq12,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r13,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq13,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq21,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq22,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r23,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq23,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r31,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq31,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r32,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq32,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r33,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq33,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 476 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq11,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq12,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r13,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq13,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq21,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq22,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r23,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq23,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r31,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq31,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r32,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq32,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r33,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq33,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 476 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*476);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r13,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r23,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r31,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r32,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r33,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 432 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r13,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r23,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r31,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r32,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r33,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 432 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*432);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq00,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 59 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq00,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 59 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 9 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*59);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 50 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 50 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*50);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: LennardJones
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq00,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq10,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq20,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 154 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq00,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq10,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq20,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 154 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*154);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: LennardJones
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 137 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 137 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*137);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: LennardJones
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq00,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r01,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq01,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r02,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq02,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq10,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq11,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq12,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq20,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq21,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq22,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 427 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq00,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r01,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq01,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r02,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq02,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq10,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq11,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq12,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq20,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq21,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq22,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 427 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*427);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: LennardJones
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r01,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r02,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 386 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r01,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r02,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 386 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*386);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: LennardJones
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq10,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq20,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r30,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq30,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 176 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq10,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq20,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r30,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq30,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 176 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*176);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: LennardJones
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r30,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 159 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r30,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 159 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*159);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: LennardJones
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq11,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq12,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r13,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq13,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq21,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq22,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r23,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq23,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r31,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq31,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r32,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq32,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r33,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq33,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 452 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq11,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq12,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r13,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq13,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq21,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq22,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r23,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq23,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r31,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq31,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r32,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq32,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r33,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq33,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 452 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*452);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: LennardJones
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r13,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r23,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r31,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r32,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r33,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 411 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r13,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r23,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r31,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r32,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r33,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 411 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*411);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: None
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq00,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 46 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq00,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 46 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 8 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*46);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: None
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 42 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 42 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*42);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: None
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq00,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq10,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq20,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 141 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq00,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq10,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq20,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 141 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*141);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: None
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 129 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 129 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*18 + inneriter*129);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: None
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq00,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r01,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq01,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r02,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq02,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq10,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq11,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq12,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq20,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq21,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq22,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 414 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq00,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r01,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq01,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r02,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq02,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq10,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq11,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq12,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq20,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq21,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq22,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 414 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*414);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: None
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r01,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r02,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 378 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,FF),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r01,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,FF),_fjsp_mul_v2r8(vftabscale,rinv01)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r02,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,FF),_fjsp_mul_v2r8(vftabscale,rinv02)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 378 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*378);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: None
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq10,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq20,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r30,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq30,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 141 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq10,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq20,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r30,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq30,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 141 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*141);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: None
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r30,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 129 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r10,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,FF),_fjsp_mul_v2r8(vftabscale,rinv10)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r20,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,FF),_fjsp_mul_v2r8(vftabscale,rinv20)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r30,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,FF),_fjsp_mul_v2r8(vftabscale,rinv30)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 129 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*18 + inneriter*129);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: None
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq11,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq12,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r13,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq13,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq21,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq22,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r23,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq23,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r31,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq31,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r32,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq32,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r33,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq33,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 414 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq11,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq12,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r13,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq13,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq21,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq22,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r23,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq23,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r31,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq31,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r32,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq32,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r33,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq33,VV);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 414 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*414);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: CubicSplineTable
- * VdW interaction: None
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r13,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r23,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r31,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r32,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r33,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 378 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r11,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,FF),_fjsp_mul_v2r8(vftabscale,rinv11)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r12,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,FF),_fjsp_mul_v2r8(vftabscale,rinv12)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r13,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,FF),_fjsp_mul_v2r8(vftabscale,rinv13)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r21,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,FF),_fjsp_mul_v2r8(vftabscale,rinv21)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r22,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,FF),_fjsp_mul_v2r8(vftabscale,rinv22)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r23,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,FF),_fjsp_mul_v2r8(vftabscale,rinv23)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r31,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,FF),_fjsp_mul_v2r8(vftabscale,rinv31)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r32,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,FF),_fjsp_mul_v2r8(vftabscale,rinv32)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r33,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,FF),_fjsp_mul_v2r8(vftabscale,rinv33)));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 378 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*378);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: CubicSplineTable
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 66 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 66 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 9 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*66);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: CubicSplineTable
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 57 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 57 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*57);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 131 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 131 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*131);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 120 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 120 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*120);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,rinv01);
- felec = _fjsp_mul_v2r8(velec,rinvsq01);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,rinv02);
- felec = _fjsp_mul_v2r8(velec,rinvsq02);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 314 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,rinv01);
- felec = _fjsp_mul_v2r8(velec,rinvsq01);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,rinv02);
- felec = _fjsp_mul_v2r8(velec,rinvsq02);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 314 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*314);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,rinv01);
- felec = _fjsp_mul_v2r8(velec,rinvsq01);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,rinv02);
- felec = _fjsp_mul_v2r8(velec,rinvsq02);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 297 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,rinv01);
- felec = _fjsp_mul_v2r8(velec,rinvsq01);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,rinv02);
- felec = _fjsp_mul_v2r8(velec,rinvsq02);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 297 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*297);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,rinv30);
- felec = _fjsp_mul_v2r8(velec,rinvsq30);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 155 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,rinv30);
- felec = _fjsp_mul_v2r8(velec,rinvsq30);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 155 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*155);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,rinv30);
- felec = _fjsp_mul_v2r8(velec,rinvsq30);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 144 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,rinv30);
- felec = _fjsp_mul_v2r8(velec,rinvsq30);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 144 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*144);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,rinv13);
- felec = _fjsp_mul_v2r8(velec,rinvsq13);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,rinv23);
- felec = _fjsp_mul_v2r8(velec,rinvsq23);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,rinv31);
- felec = _fjsp_mul_v2r8(velec,rinvsq31);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,rinv32);
- felec = _fjsp_mul_v2r8(velec,rinvsq32);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,rinv33);
- felec = _fjsp_mul_v2r8(velec,rinvsq33);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 341 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,rinv13);
- felec = _fjsp_mul_v2r8(velec,rinvsq13);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,rinv23);
- felec = _fjsp_mul_v2r8(velec,rinvsq23);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,rinv31);
- felec = _fjsp_mul_v2r8(velec,rinvsq31);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,rinv32);
- felec = _fjsp_mul_v2r8(velec,rinvsq32);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,rinv33);
- felec = _fjsp_mul_v2r8(velec,rinvsq33);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 341 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*341);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,rinv13);
- felec = _fjsp_mul_v2r8(velec,rinvsq13);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,rinv23);
- felec = _fjsp_mul_v2r8(velec,rinvsq23);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,rinv31);
- felec = _fjsp_mul_v2r8(velec,rinvsq31);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,rinv32);
- felec = _fjsp_mul_v2r8(velec,rinvsq32);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,rinv33);
- felec = _fjsp_mul_v2r8(velec,rinvsq33);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 324 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,rinv13);
- felec = _fjsp_mul_v2r8(velec,rinvsq13);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,rinv23);
- felec = _fjsp_mul_v2r8(velec,rinvsq23);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,rinv31);
- felec = _fjsp_mul_v2r8(velec,rinvsq31);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,rinv32);
- felec = _fjsp_mul_v2r8(velec,rinvsq32);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,rinv33);
- felec = _fjsp_mul_v2r8(velec,rinvsq33);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 324 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*324);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 43 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 43 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 9 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*43);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 37 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 37 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*37);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: LennardJones
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 108 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 108 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*108);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: LennardJones
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 100 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 100 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*100);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: LennardJones
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,rinv01);
- felec = _fjsp_mul_v2r8(velec,rinvsq01);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,rinv02);
- felec = _fjsp_mul_v2r8(velec,rinvsq02);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 291 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,rinv01);
- felec = _fjsp_mul_v2r8(velec,rinvsq01);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,rinv02);
- felec = _fjsp_mul_v2r8(velec,rinvsq02);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 291 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*291);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: LennardJones
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,rinv01);
- felec = _fjsp_mul_v2r8(velec,rinvsq01);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,rinv02);
- felec = _fjsp_mul_v2r8(velec,rinvsq02);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 277 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,rinv01);
- felec = _fjsp_mul_v2r8(velec,rinvsq01);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,rinv02);
- felec = _fjsp_mul_v2r8(velec,rinvsq02);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 277 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*277);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: LennardJones
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,rinv30);
- felec = _fjsp_mul_v2r8(velec,rinvsq30);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 131 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,rinv30);
- felec = _fjsp_mul_v2r8(velec,rinvsq30);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 131 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*131);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: LennardJones
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,rinv30);
- felec = _fjsp_mul_v2r8(velec,rinvsq30);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 123 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,rinv30);
- felec = _fjsp_mul_v2r8(velec,rinvsq30);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 123 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*123);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: LennardJones
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,rinv13);
- felec = _fjsp_mul_v2r8(velec,rinvsq13);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,rinv23);
- felec = _fjsp_mul_v2r8(velec,rinvsq23);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,rinv31);
- felec = _fjsp_mul_v2r8(velec,rinvsq31);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,rinv32);
- felec = _fjsp_mul_v2r8(velec,rinvsq32);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,rinv33);
- felec = _fjsp_mul_v2r8(velec,rinvsq33);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 317 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,rinv13);
- felec = _fjsp_mul_v2r8(velec,rinvsq13);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,rinv23);
- felec = _fjsp_mul_v2r8(velec,rinvsq23);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,rinv31);
- felec = _fjsp_mul_v2r8(velec,rinvsq31);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,rinv32);
- felec = _fjsp_mul_v2r8(velec,rinvsq32);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,rinv33);
- felec = _fjsp_mul_v2r8(velec,rinvsq33);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 317 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*317);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: LennardJones
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,rinv13);
- felec = _fjsp_mul_v2r8(velec,rinvsq13);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,rinv23);
- felec = _fjsp_mul_v2r8(velec,rinvsq23);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,rinv31);
- felec = _fjsp_mul_v2r8(velec,rinvsq31);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,rinv32);
- felec = _fjsp_mul_v2r8(velec,rinvsq32);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,rinv33);
- felec = _fjsp_mul_v2r8(velec,rinvsq33);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 303 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,rinv13);
- felec = _fjsp_mul_v2r8(velec,rinvsq13);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,rinv23);
- felec = _fjsp_mul_v2r8(velec,rinvsq23);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,rinv31);
- felec = _fjsp_mul_v2r8(velec,rinvsq31);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,rinv32);
- felec = _fjsp_mul_v2r8(velec,rinvsq32);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,rinv33);
- felec = _fjsp_mul_v2r8(velec,rinvsq33);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 303 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*303);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: None
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 31 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 31 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 8 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*31);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: None
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 30 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 30 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*30);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: None
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 96 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 96 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*96);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: None
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 93 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 93 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*18 + inneriter*93);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: None
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,rinv01);
- felec = _fjsp_mul_v2r8(velec,rinvsq01);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,rinv02);
- felec = _fjsp_mul_v2r8(velec,rinvsq02);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 279 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,rinv01);
- felec = _fjsp_mul_v2r8(velec,rinvsq01);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,rinv02);
- felec = _fjsp_mul_v2r8(velec,rinvsq02);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 279 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*279);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: None
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,rinv01);
- felec = _fjsp_mul_v2r8(velec,rinvsq01);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,rinv02);
- felec = _fjsp_mul_v2r8(velec,rinvsq02);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 270 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,rinv00);
- felec = _fjsp_mul_v2r8(velec,rinvsq00);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,rinv01);
- felec = _fjsp_mul_v2r8(velec,rinvsq01);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,rinv02);
- felec = _fjsp_mul_v2r8(velec,rinvsq02);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 270 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*270);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: None
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,rinv30);
- felec = _fjsp_mul_v2r8(velec,rinvsq30);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 96 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,rinv30);
- felec = _fjsp_mul_v2r8(velec,rinvsq30);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 96 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*96);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: None
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,rinv30);
- felec = _fjsp_mul_v2r8(velec,rinvsq30);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 93 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,rinv10);
- felec = _fjsp_mul_v2r8(velec,rinvsq10);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,rinv20);
- felec = _fjsp_mul_v2r8(velec,rinvsq20);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,rinv30);
- felec = _fjsp_mul_v2r8(velec,rinvsq30);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 93 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*18 + inneriter*93);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: None
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,rinv13);
- felec = _fjsp_mul_v2r8(velec,rinvsq13);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,rinv23);
- felec = _fjsp_mul_v2r8(velec,rinvsq23);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,rinv31);
- felec = _fjsp_mul_v2r8(velec,rinvsq31);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,rinv32);
- felec = _fjsp_mul_v2r8(velec,rinvsq32);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,rinv33);
- felec = _fjsp_mul_v2r8(velec,rinvsq33);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 279 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,rinv13);
- felec = _fjsp_mul_v2r8(velec,rinvsq13);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,rinv23);
- felec = _fjsp_mul_v2r8(velec,rinvsq23);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,rinv31);
- felec = _fjsp_mul_v2r8(velec,rinvsq31);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,rinv32);
- felec = _fjsp_mul_v2r8(velec,rinvsq32);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,rinv33);
- felec = _fjsp_mul_v2r8(velec,rinvsq33);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 279 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*279);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Coulomb
- * VdW interaction: None
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,rinv13);
- felec = _fjsp_mul_v2r8(velec,rinvsq13);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,rinv23);
- felec = _fjsp_mul_v2r8(velec,rinvsq23);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,rinv31);
- felec = _fjsp_mul_v2r8(velec,rinvsq31);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,rinv32);
- felec = _fjsp_mul_v2r8(velec,rinvsq32);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,rinv33);
- felec = _fjsp_mul_v2r8(velec,rinvsq33);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 270 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,rinv11);
- felec = _fjsp_mul_v2r8(velec,rinvsq11);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,rinv12);
- felec = _fjsp_mul_v2r8(velec,rinvsq12);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,rinv13);
- felec = _fjsp_mul_v2r8(velec,rinvsq13);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,rinv21);
- felec = _fjsp_mul_v2r8(velec,rinvsq21);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,rinv22);
- felec = _fjsp_mul_v2r8(velec,rinvsq22);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,rinv23);
- felec = _fjsp_mul_v2r8(velec,rinvsq23);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,rinv31);
- felec = _fjsp_mul_v2r8(velec,rinvsq31);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,rinv32);
- felec = _fjsp_mul_v2r8(velec,rinvsq32);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,rinv33);
- felec = _fjsp_mul_v2r8(velec,rinvsq33);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 270 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*270);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 79 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 79 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 9 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*79);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 64 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 64 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*64);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- _fjsp_v2r8 c6grid_10;
- _fjsp_v2r8 c6grid_20;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 180 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 180 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*180);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- _fjsp_v2r8 c6grid_10;
- _fjsp_v2r8 c6grid_20;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 151 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 151 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*151);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- _fjsp_v2r8 c6grid_01;
- _fjsp_v2r8 c6grid_02;
- _fjsp_v2r8 c6grid_10;
- _fjsp_v2r8 c6grid_11;
- _fjsp_v2r8 c6grid_12;
- _fjsp_v2r8 c6grid_20;
- _fjsp_v2r8 c6grid_21;
- _fjsp_v2r8 c6grid_22;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- c6grid_00 = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 471 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 471 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*471);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- _fjsp_v2r8 c6grid_01;
- _fjsp_v2r8 c6grid_02;
- _fjsp_v2r8 c6grid_10;
- _fjsp_v2r8 c6grid_11;
- _fjsp_v2r8 c6grid_12;
- _fjsp_v2r8 c6grid_20;
- _fjsp_v2r8 c6grid_21;
- _fjsp_v2r8 c6grid_22;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- c6grid_00 = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 400 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 400 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*400);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- _fjsp_v2r8 c6grid_10;
- _fjsp_v2r8 c6grid_20;
- _fjsp_v2r8 c6grid_30;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv30,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 209 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv30,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 209 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*209);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- _fjsp_v2r8 c6grid_10;
- _fjsp_v2r8 c6grid_20;
- _fjsp_v2r8 c6grid_30;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 180 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 180 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*180);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- _fjsp_v2r8 c6grid_11;
- _fjsp_v2r8 c6grid_12;
- _fjsp_v2r8 c6grid_13;
- _fjsp_v2r8 c6grid_21;
- _fjsp_v2r8 c6grid_22;
- _fjsp_v2r8 c6grid_23;
- _fjsp_v2r8 c6grid_31;
- _fjsp_v2r8 c6grid_32;
- _fjsp_v2r8 c6grid_33;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- c6grid_00 = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv13,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv23,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv31,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv32,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv33,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 503 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv13,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv23,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv31,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv32,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv33,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 503 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*503);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- _fjsp_v2r8 c6grid_11;
- _fjsp_v2r8 c6grid_12;
- _fjsp_v2r8 c6grid_13;
- _fjsp_v2r8 c6grid_21;
- _fjsp_v2r8 c6grid_22;
- _fjsp_v2r8 c6grid_23;
- _fjsp_v2r8 c6grid_31;
- _fjsp_v2r8 c6grid_32;
- _fjsp_v2r8 c6grid_33;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- c6grid_00 = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 432 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 432 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*432);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 67 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 67 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 9 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*67);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 49 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 49 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*49);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 168 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 168 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*168);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 136 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 136 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*136);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 459 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 459 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*459);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 385 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 385 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*385);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv30,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 194 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv30,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 194 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*194);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 162 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 162 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*162);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv13,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv23,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv31,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv32,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv33,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 488 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv13,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv23,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv31,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv32,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv33,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 488 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*488);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 414 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 414 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*414);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 49 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 49 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 8 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*49);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 42 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 42 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*42);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 150 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 150 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*150);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 129 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 129 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*18 + inneriter*129);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 441 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv00,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv01,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv02,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 441 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*441);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 378 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 378 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*378);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv30,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 150 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv10,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv20,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv30,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 150 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*150);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 129 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 129 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*18 + inneriter*129);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv13,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv23,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv31,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv32,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv33,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 441 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv11,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv12,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv13,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv21,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv22,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv23,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv31,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv32,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv33,sh_ewald),velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 441 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*441);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 378 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 378 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*378);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 86 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 86 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 9 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*86);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 80 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 80 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*80);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 225 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 225 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*225);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 213 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 213 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*213);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- d = _fjsp_sub_v2r8(r01,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv01,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- d = _fjsp_sub_v2r8(r02,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv02,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- d = _fjsp_sub_v2r8(r11,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- d = _fjsp_sub_v2r8(r12,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- d = _fjsp_sub_v2r8(r21,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- d = _fjsp_sub_v2r8(r22,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 630 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- d = _fjsp_sub_v2r8(r01,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv01,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- d = _fjsp_sub_v2r8(r02,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv02,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- d = _fjsp_sub_v2r8(r11,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- d = _fjsp_sub_v2r8(r12,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- d = _fjsp_sub_v2r8(r21,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- d = _fjsp_sub_v2r8(r22,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 630 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*630);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- d = _fjsp_sub_v2r8(r01,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv01,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- d = _fjsp_sub_v2r8(r02,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv02,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- d = _fjsp_sub_v2r8(r11,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- d = _fjsp_sub_v2r8(r12,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- d = _fjsp_sub_v2r8(r21,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- d = _fjsp_sub_v2r8(r22,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 600 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- d = _fjsp_sub_v2r8(r01,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv01,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- d = _fjsp_sub_v2r8(r02,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv02,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- d = _fjsp_sub_v2r8(r11,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- d = _fjsp_sub_v2r8(r12,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- d = _fjsp_sub_v2r8(r21,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- d = _fjsp_sub_v2r8(r22,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 600 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*600);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- d = _fjsp_sub_v2r8(r30,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv30,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 269 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- d = _fjsp_sub_v2r8(r30,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv30,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 269 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*269);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- d = _fjsp_sub_v2r8(r30,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv30,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 257 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- d = _fjsp_sub_v2r8(r30,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv30,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 257 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*257);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- d = _fjsp_sub_v2r8(r11,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- d = _fjsp_sub_v2r8(r12,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- d = _fjsp_sub_v2r8(r13,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv13,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- d = _fjsp_sub_v2r8(r21,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- d = _fjsp_sub_v2r8(r22,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- d = _fjsp_sub_v2r8(r23,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv23,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- d = _fjsp_sub_v2r8(r31,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv31,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- d = _fjsp_sub_v2r8(r32,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv32,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- d = _fjsp_sub_v2r8(r33,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv33,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 677 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- d = _fjsp_sub_v2r8(r11,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- d = _fjsp_sub_v2r8(r12,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- d = _fjsp_sub_v2r8(r13,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv13,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- d = _fjsp_sub_v2r8(r21,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- d = _fjsp_sub_v2r8(r22,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- d = _fjsp_sub_v2r8(r23,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv23,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- d = _fjsp_sub_v2r8(r31,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv31,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- d = _fjsp_sub_v2r8(r32,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv32,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- d = _fjsp_sub_v2r8(r33,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv33,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 677 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*677);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- d = _fjsp_sub_v2r8(r11,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- d = _fjsp_sub_v2r8(r12,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- d = _fjsp_sub_v2r8(r13,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv13,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- d = _fjsp_sub_v2r8(r21,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- d = _fjsp_sub_v2r8(r22,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- d = _fjsp_sub_v2r8(r23,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv23,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- d = _fjsp_sub_v2r8(r31,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv31,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- d = _fjsp_sub_v2r8(r32,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv32,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- d = _fjsp_sub_v2r8(r33,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv33,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 647 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- d = _fjsp_sub_v2r8(r11,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- d = _fjsp_sub_v2r8(r12,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- d = _fjsp_sub_v2r8(r13,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv13,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- d = _fjsp_sub_v2r8(r21,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- d = _fjsp_sub_v2r8(r22,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- d = _fjsp_sub_v2r8(r23,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv23,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- d = _fjsp_sub_v2r8(r31,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv31,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- d = _fjsp_sub_v2r8(r32,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv32,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- d = _fjsp_sub_v2r8(r33,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv33,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 647 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*647);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 68 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 68 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 8 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*68);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 65 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 65 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*65);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 207 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 207 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*207);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 198 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 198 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*18 + inneriter*198);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- d = _fjsp_sub_v2r8(r01,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv01,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- d = _fjsp_sub_v2r8(r02,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv02,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- d = _fjsp_sub_v2r8(r11,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- d = _fjsp_sub_v2r8(r12,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- d = _fjsp_sub_v2r8(r21,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- d = _fjsp_sub_v2r8(r22,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 612 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- d = _fjsp_sub_v2r8(r01,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv01,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- d = _fjsp_sub_v2r8(r02,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv02,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- d = _fjsp_sub_v2r8(r11,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- d = _fjsp_sub_v2r8(r12,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- d = _fjsp_sub_v2r8(r21,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- d = _fjsp_sub_v2r8(r22,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 612 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*612);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- d = _fjsp_sub_v2r8(r01,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv01,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- d = _fjsp_sub_v2r8(r02,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv02,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- d = _fjsp_sub_v2r8(r11,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- d = _fjsp_sub_v2r8(r12,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- d = _fjsp_sub_v2r8(r21,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- d = _fjsp_sub_v2r8(r22,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 585 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- d = _fjsp_sub_v2r8(r01,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv01,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- d = _fjsp_sub_v2r8(r02,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv02,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- d = _fjsp_sub_v2r8(r11,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- d = _fjsp_sub_v2r8(r12,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- d = _fjsp_sub_v2r8(r21,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- d = _fjsp_sub_v2r8(r22,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 585 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*585);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- d = _fjsp_sub_v2r8(r30,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv30,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 207 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- d = _fjsp_sub_v2r8(r30,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv30,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 207 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*207);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- d = _fjsp_sub_v2r8(r30,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv30,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 198 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- d = _fjsp_sub_v2r8(r10,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv10,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- d = _fjsp_sub_v2r8(r20,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv20,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- d = _fjsp_sub_v2r8(r30,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv30,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 198 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*18 + inneriter*198);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- d = _fjsp_sub_v2r8(r11,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- d = _fjsp_sub_v2r8(r12,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- d = _fjsp_sub_v2r8(r13,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv13,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- d = _fjsp_sub_v2r8(r21,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- d = _fjsp_sub_v2r8(r22,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- d = _fjsp_sub_v2r8(r23,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv23,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- d = _fjsp_sub_v2r8(r31,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv31,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- d = _fjsp_sub_v2r8(r32,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv32,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- d = _fjsp_sub_v2r8(r33,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv33,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 612 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- d = _fjsp_sub_v2r8(r11,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- d = _fjsp_sub_v2r8(r12,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- d = _fjsp_sub_v2r8(r13,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv13,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- d = _fjsp_sub_v2r8(r21,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- d = _fjsp_sub_v2r8(r22,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- d = _fjsp_sub_v2r8(r23,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv23,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- d = _fjsp_sub_v2r8(r31,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv31,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- d = _fjsp_sub_v2r8(r32,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv32,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- d = _fjsp_sub_v2r8(r33,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv33,_fjsp_mul_v2r8(velec,dsw)) );
- velec = _fjsp_mul_v2r8(velec,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 612 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*612);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- d = _fjsp_sub_v2r8(r11,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- d = _fjsp_sub_v2r8(r12,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- d = _fjsp_sub_v2r8(r13,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv13,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- d = _fjsp_sub_v2r8(r21,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- d = _fjsp_sub_v2r8(r22,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- d = _fjsp_sub_v2r8(r23,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv23,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- d = _fjsp_sub_v2r8(r31,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv31,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- d = _fjsp_sub_v2r8(r32,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv32,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- d = _fjsp_sub_v2r8(r33,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv33,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 585 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- d = _fjsp_sub_v2r8(r11,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv11,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- d = _fjsp_sub_v2r8(r12,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv12,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- d = _fjsp_sub_v2r8(r13,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv13,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- d = _fjsp_sub_v2r8(r21,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv21,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- d = _fjsp_sub_v2r8(r22,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv22,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- d = _fjsp_sub_v2r8(r23,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv23,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- d = _fjsp_sub_v2r8(r31,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv31,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- d = _fjsp_sub_v2r8(r32,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv32,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- d = _fjsp_sub_v2r8(r33,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv33,_fjsp_mul_v2r8(velec,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 585 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*585);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: CubicSplineTable
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 78 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 78 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 9 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*78);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: CubicSplineTable
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 65 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 65 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*65);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 169 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 169 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*169);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 146 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 146 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*146);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 430 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 430 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*430);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 377 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 377 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*377);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 194 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 194 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*194);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 171 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 171 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*171);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 458 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 458 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*458);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 405 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 405 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*405);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 68 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 68 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 9 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*68);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 61 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 61 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*61);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- _fjsp_v2r8 c6grid_10;
- _fjsp_v2r8 c6grid_20;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 159 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 159 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*159);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- _fjsp_v2r8 c6grid_10;
- _fjsp_v2r8 c6grid_20;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 142 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 142 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*142);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- _fjsp_v2r8 c6grid_01;
- _fjsp_v2r8 c6grid_02;
- _fjsp_v2r8 c6grid_10;
- _fjsp_v2r8 c6grid_11;
- _fjsp_v2r8 c6grid_12;
- _fjsp_v2r8 c6grid_20;
- _fjsp_v2r8 c6grid_21;
- _fjsp_v2r8 c6grid_22;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- c6grid_00 = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 420 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 420 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*420);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- _fjsp_v2r8 c6grid_01;
- _fjsp_v2r8 c6grid_02;
- _fjsp_v2r8 c6grid_10;
- _fjsp_v2r8 c6grid_11;
- _fjsp_v2r8 c6grid_12;
- _fjsp_v2r8 c6grid_20;
- _fjsp_v2r8 c6grid_21;
- _fjsp_v2r8 c6grid_22;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- c6grid_00 = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 373 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 373 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*373);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- _fjsp_v2r8 c6grid_10;
- _fjsp_v2r8 c6grid_20;
- _fjsp_v2r8 c6grid_30;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 185 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 185 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*185);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- _fjsp_v2r8 c6grid_10;
- _fjsp_v2r8 c6grid_20;
- _fjsp_v2r8 c6grid_30;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 168 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 168 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*168);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- _fjsp_v2r8 c6grid_11;
- _fjsp_v2r8 c6grid_12;
- _fjsp_v2r8 c6grid_13;
- _fjsp_v2r8 c6grid_21;
- _fjsp_v2r8 c6grid_22;
- _fjsp_v2r8 c6grid_23;
- _fjsp_v2r8 c6grid_31;
- _fjsp_v2r8 c6grid_32;
- _fjsp_v2r8 c6grid_33;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- c6grid_00 = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 449 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 449 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*449);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LJEwald
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwLJEw_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- _fjsp_v2r8 c6grid_11;
- _fjsp_v2r8 c6grid_12;
- _fjsp_v2r8 c6grid_13;
- _fjsp_v2r8 c6grid_21;
- _fjsp_v2r8 c6grid_22;
- _fjsp_v2r8 c6grid_23;
- _fjsp_v2r8 c6grid_31;
- _fjsp_v2r8 c6grid_32;
- _fjsp_v2r8 c6grid_33;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- c6grid_00 = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 402 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 402 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*402);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 56 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 56 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 9 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*56);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 46 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 46 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*46);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 147 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 147 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*147);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 127 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 127 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*127);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 408 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 408 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*408);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 358 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 358 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*358);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 170 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 170 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*170);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 150 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 150 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*150);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 434 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 434 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*434);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: LennardJones
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 384 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 384 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*384);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 44 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 44 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 8 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*44);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 39 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 39 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*39);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 135 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 135 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*135);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 120 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 120 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*18 + inneriter*120);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 396 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(rinv00,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(rinv01,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(rinv02,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 396 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*396);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 351 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r00,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq00,rinv00),_fjsp_sub_v2r8(rinvsq00,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r01 = _fjsp_mul_v2r8(rsq01,rinv01);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r01,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq01,rinv01),_fjsp_sub_v2r8(rinvsq01,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r02 = _fjsp_mul_v2r8(rsq02,rinv02);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r02,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq02,rinv02),_fjsp_sub_v2r8(rinvsq02,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 351 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*351);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 135 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(rinv10,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(rinv20,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(rinv30,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 135 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*135);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 120 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r10 = _fjsp_mul_v2r8(rsq10,rinv10);
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r10,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq10,rinv10),_fjsp_sub_v2r8(rinvsq10,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r20 = _fjsp_mul_v2r8(rsq20,rinv20);
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r20,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq20,rinv20),_fjsp_sub_v2r8(rinvsq20,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r30 = _fjsp_mul_v2r8(rsq30,rinv30);
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r30,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq30,rinv30),_fjsp_sub_v2r8(rinvsq30,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 120 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*18 + inneriter*120);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 396 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(rinv11,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(rinv12,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(rinv13,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(rinv21,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(rinv22,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(rinv23,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(rinv31,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(rinv32,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- ewtabD = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- ewtabFn = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(rinv33,velec));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 396 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*396);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: Ewald
- * VdW interaction: None
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
-
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 351 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r11 = _fjsp_mul_v2r8(rsq11,rinv11);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r11,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq11,rinv11),_fjsp_sub_v2r8(rinvsq11,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r12 = _fjsp_mul_v2r8(rsq12,rinv12);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r12,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq12,rinv12),_fjsp_sub_v2r8(rinvsq12,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r13 = _fjsp_mul_v2r8(rsq13,rinv13);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r13,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq13,rinv13),_fjsp_sub_v2r8(rinvsq13,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r21 = _fjsp_mul_v2r8(rsq21,rinv21);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r21,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq21,rinv21),_fjsp_sub_v2r8(rinvsq21,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r22 = _fjsp_mul_v2r8(rsq22,rinv22);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r22,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq22,rinv22),_fjsp_sub_v2r8(rinvsq22,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r23 = _fjsp_mul_v2r8(rsq23,rinv23);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r23,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq23,rinv23),_fjsp_sub_v2r8(rinvsq23,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r31 = _fjsp_mul_v2r8(rsq31,rinv31);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r31,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq31,rinv31),_fjsp_sub_v2r8(rinvsq31,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r32 = _fjsp_mul_v2r8(rsq32,rinv32);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r32,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq32,rinv32),_fjsp_sub_v2r8(rinvsq32,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r33 = _fjsp_mul_v2r8(rsq33,rinv33);
-
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r33,ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq33,rinv33),_fjsp_sub_v2r8(rinvsq33,felec));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 351 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*351);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction: CubicSplineTable
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 59 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 59 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*59);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction: CubicSplineTable
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 51 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 51 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 6 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*6 + inneriter*51);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction: LJEwald
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- rcutoff_scalar = fr->ic->rvdw;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 59 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_00,sh_lj_ewald,_fjsp_mul_v2r8(c6_00,sh_vdw_invrcut6))),one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 59 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*59);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction: LJEwald
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- rcutoff_scalar = fr->ic->rvdw;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 51 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 51 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 6 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*6 + inneriter*51);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction: LJEwald
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecNone_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 50 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 50 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*50);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction: LJEwald
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecNone_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 c6grid_00;
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 48 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- c6grid_00 = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
- vdwgridparam+vdwioffset0+vdwjidx0B);
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq00);
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,_fjsp_sub_v2r8(c6_00,f6A)),rinvsix,f6B),rinvsq00);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 48 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 6 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*6 + inneriter*48);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- rcutoff_scalar = fr->ic->rvdw;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 44 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 44 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*44);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- rcutoff_scalar = fr->ic->rvdw;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 33 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 33 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 6 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*6 + inneriter*33);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- rcutoff_scalar = fr->ic->rvdw;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rvdw_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 62 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 62 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*62);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- rcutoff_scalar = fr->ic->rvdw;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rvdw_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 59 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 59 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 6 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*6 + inneriter*59);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 35 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 35 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*35);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: None
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 30 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
-
- /* Load parameters for j particles */
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 30 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 6 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*6 + inneriter*30);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 75 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 75 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 9 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*75);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 60 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 60 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*60);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 156 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 156 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*156);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 129 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 129 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*129);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 387 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 387 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*387);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 324 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 324 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*324);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 179 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 179 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*179);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 153 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 153 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*153);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 413 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 413 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*413);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 351 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 351 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*351);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 57 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 57 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 9 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*57);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 40 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 40 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*40);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 138 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 138 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*138);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 109 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 109 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*109);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 369 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 369 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*369);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 304 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 304 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*304);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 164 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 164 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*164);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 135 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 135 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*135);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 398 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_00,_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_00,sh_vdw_invrcut6,vvdw6),one_sixth));
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 398 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*398);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 333 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 333 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*333);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rvdw_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 73 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 73 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 9 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*73);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rvdw_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 64 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 64 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*64);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rvdw_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 154 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 154 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*154);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rvdw_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 133 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 133 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*133);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rvdw_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 385 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 385 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*385);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rvdw_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 328 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 328 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*328);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rvdw_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 182 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 182 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*182);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rvdw_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 161 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 161 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*161);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rvdw_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 416 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 416 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*416);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- rswitch_scalar = fr->ic->rvdw_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 359 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- d = _fjsp_sub_v2r8(r00,rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
-
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
-
- /* Evaluate switch function */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv00,_fjsp_mul_v2r8(vvdw,dsw)) );
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = fvdw;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 359 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*359);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 39 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 39 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 8 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*39);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 33 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- }
-
- /* Inner loop uses 33 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*33);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 120 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 120 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*120);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 102 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 102 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*18 + inneriter*102);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 351 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 351 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*351);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 297 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq00,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq00,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq01,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq01,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq02,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq02,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 297 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*297);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 120 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 120 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*120);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 102 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq10,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq10,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq20,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq20,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq30,rcutoff2))
- {
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq30,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- }
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 102 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*18 + inneriter*102);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 351 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 351 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*351);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 297 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq11,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq11,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq12,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq12,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq13,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq13,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq21,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq21,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq22,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq22,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq23,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq23,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq31,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq31,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq32,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq32,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- }
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- if (gmx_fjsp_any_lt_v2r8(rsq33,rcutoff2))
- {
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- cutoff_mask = _fjsp_cmplt_v2r8(rsq33,rcutoff2);
-
- fscal = felec;
-
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- }
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 297 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*297);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 70 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 70 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 9 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*70);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 57 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 57 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*57);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 143 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 143 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*143);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 120 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 120 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*120);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 350 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 350 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*350);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 297 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 297 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*297);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 167 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 167 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*167);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 144 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 144 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*144);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 377 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_00,VV);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 377 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*377);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: CubicSplineTable
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 324 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- r00 = _fjsp_mul_v2r8(rsq00,rinv00);
-
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r00,vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
-
- /* CUBIC SPLINE TABLE DISPERSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_00,FF);
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- F = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- H = _fjsp_setzero_v2r8();
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_00,FF);
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv00)));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 324 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*324);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 47 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 47 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 9 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*47);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 37 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 37 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*7 + inneriter*37);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 120 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 120 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*120);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 100 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 100 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*18 + inneriter*100);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 327 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 327 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 20 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*327);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- vdwjidx0A = 2*vdwtype[inr+0];
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 277 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = _fjsp_add_v2r8(felec,fvdw);
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 277 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*18 + inneriter*277);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 143 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 143 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*143);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
- vdwjidx0B = 2*vdwtype[jnrB+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 123 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
- vdwjidx0A = 2*vdwtype[jnrA+0];
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
- vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 123 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*24 + inneriter*123);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
- vvdwsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 353 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- vvdw6 = _fjsp_mul_v2r8(c6_00,rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq00);
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 353 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 26 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*353);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: LennardJones
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
- vdwioffset0 = 2*nvdwtype*vdwtype[inr+0];
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- vdwjidx0A = 2*vdwtype[inr+0];
- c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]);
- c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = fvdw;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 303 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq00 = gmx_fjsp_inv_v2r8(rsq00);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_00,rinvsix,c6_00),_fjsp_mul_v2r8(rinvsix,rinvsq00));
-
- fscal = fvdw;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 303 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 24 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*24 + inneriter*303);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Particle-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 35 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 35 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 8 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*35);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Particle-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
-
- /* Load parameters for i particles */
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+0));
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 30 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx00,dy00,dz00);
-
- /* Inner loop uses 30 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 7 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*7 + inneriter*30);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Water3-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 108 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 108 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*108);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Water3-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 93 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 93 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*18 + inneriter*93);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Water3-Water3
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 315 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq00,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq00,rinv00),crf));
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq01,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq01,rinv01),crf));
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq02,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq02,rinv02),crf));
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 315 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*315);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Water3-Water3
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset0;
- _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00;
- _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01;
- _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0]));
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
-
- jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]);
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- qq00 = _fjsp_mul_v2r8(iq0,jq0);
- qq01 = _fjsp_mul_v2r8(iq0,jq1);
- qq02 = _fjsp_mul_v2r8(iq0,jq2);
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
-
- fix0 = _fjsp_setzero_v2r8();
- fiy0 = _fjsp_setzero_v2r8();
- fiz0 = _fjsp_setzero_v2r8();
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 270 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
-
- /* Calculate displacement vector */
- dx00 = _fjsp_sub_v2r8(ix0,jx0);
- dy00 = _fjsp_sub_v2r8(iy0,jy0);
- dz00 = _fjsp_sub_v2r8(iz0,jz0);
- dx01 = _fjsp_sub_v2r8(ix0,jx1);
- dy01 = _fjsp_sub_v2r8(iy0,jy1);
- dz01 = _fjsp_sub_v2r8(iz0,jz1);
- dx02 = _fjsp_sub_v2r8(ix0,jx2);
- dy02 = _fjsp_sub_v2r8(iy0,jy2);
- dz02 = _fjsp_sub_v2r8(iz0,jz2);
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
-
- /* Calculate squared distance and things based on it */
- rsq00 = gmx_fjsp_calc_rsq_v2r8(dx00,dy00,dz00);
- rsq01 = gmx_fjsp_calc_rsq_v2r8(dx01,dy01,dz01);
- rsq02 = gmx_fjsp_calc_rsq_v2r8(dx02,dy02,dz02);
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
-
- rinv00 = gmx_fjsp_invsqrt_v2r8(rsq00);
- rinv01 = gmx_fjsp_invsqrt_v2r8(rsq01);
- rinv02 = gmx_fjsp_invsqrt_v2r8(rsq02);
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
-
- rinvsq00 = _fjsp_mul_v2r8(rinv00,rinv00);
- rinvsq01 = _fjsp_mul_v2r8(rinv01,rinv01);
- rinvsq02 = _fjsp_mul_v2r8(rinv02,rinv02);
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq00,_fjsp_msub_v2r8(rinv00,rinvsq00,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx00,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy00,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz00,fscal,fiz0);
-
- fjx0 = _fjsp_madd_v2r8(dx00,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy00,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz00,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq01,_fjsp_msub_v2r8(rinv01,rinvsq01,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx01,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy01,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz01,fscal,fiz0);
-
- fjx1 = _fjsp_madd_v2r8(dx01,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy01,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz01,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq02,_fjsp_msub_v2r8(rinv02,rinvsq02,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix0 = _fjsp_madd_v2r8(dx02,fscal,fix0);
- fiy0 = _fjsp_madd_v2r8(dy02,fscal,fiy0);
- fiz0 = _fjsp_madd_v2r8(dz02,fscal,fiz0);
-
- fjx2 = _fjsp_madd_v2r8(dx02,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy02,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz02,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
-
- /* Inner loop uses 270 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*18 + inneriter*270);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Water4-Particle
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 108 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq10,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq10,rinv10),crf));
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq20,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq20,rinv20),crf));
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq30,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq30,rinv30),crf));
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 108 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*108);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Water4-Particle
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx0A,vdwjidx0B;
- _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
- _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10;
- _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20;
- _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+0,charge+jnrB+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 93 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
-
- /* Calculate displacement vector */
- dx10 = _fjsp_sub_v2r8(ix1,jx0);
- dy10 = _fjsp_sub_v2r8(iy1,jy0);
- dz10 = _fjsp_sub_v2r8(iz1,jz0);
- dx20 = _fjsp_sub_v2r8(ix2,jx0);
- dy20 = _fjsp_sub_v2r8(iy2,jy0);
- dz20 = _fjsp_sub_v2r8(iz2,jz0);
- dx30 = _fjsp_sub_v2r8(ix3,jx0);
- dy30 = _fjsp_sub_v2r8(iy3,jy0);
- dz30 = _fjsp_sub_v2r8(iz3,jz0);
-
- /* Calculate squared distance and things based on it */
- rsq10 = gmx_fjsp_calc_rsq_v2r8(dx10,dy10,dz10);
- rsq20 = gmx_fjsp_calc_rsq_v2r8(dx20,dy20,dz20);
- rsq30 = gmx_fjsp_calc_rsq_v2r8(dx30,dy30,dz30);
-
- rinv10 = gmx_fjsp_invsqrt_v2r8(rsq10);
- rinv20 = gmx_fjsp_invsqrt_v2r8(rsq20);
- rinv30 = gmx_fjsp_invsqrt_v2r8(rsq30);
-
- rinvsq10 = _fjsp_mul_v2r8(rinv10,rinv10);
- rinvsq20 = _fjsp_mul_v2r8(rinv20,rinv20);
- rinvsq30 = _fjsp_mul_v2r8(rinv30,rinv30);
-
- /* Load parameters for j particles */
- jq0 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+0);
-
- fjx0 = _fjsp_setzero_v2r8();
- fjy0 = _fjsp_setzero_v2r8();
- fjz0 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq10 = _fjsp_mul_v2r8(iq1,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq10,_fjsp_msub_v2r8(rinv10,rinvsq10,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx10,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy10,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz10,fscal,fiz1);
-
- fjx0 = _fjsp_madd_v2r8(dx10,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy10,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz10,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq20 = _fjsp_mul_v2r8(iq2,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq20,_fjsp_msub_v2r8(rinv20,rinvsq20,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx20,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy20,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz20,fscal,fiz2);
-
- fjx0 = _fjsp_madd_v2r8(dx20,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy20,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz20,fscal,fjz0);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* Compute parameters for interactions between i and j atoms */
- qq30 = _fjsp_mul_v2r8(iq3,jq0);
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq30,_fjsp_msub_v2r8(rinv30,rinvsq30,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx30,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy30,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz30,fscal,fiz3);
-
- fjx0 = _fjsp_madd_v2r8(dx30,fscal,fjx0);
- fjy0 = _fjsp_madd_v2r8(dy30,fscal,fjy0);
- fjz0 = _fjsp_madd_v2r8(dz30,fscal,fjz0);
-
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
-
- /* Inner loop uses 93 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*18 + inneriter*93);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Water4-Water4
- * Calculate force/pot: PotentialAndForce
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Reset potential sums */
- velecsum = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 315 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq11,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq11,rinv11),crf));
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq12,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq12,rinv12),crf));
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq13,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq13,rinv13),crf));
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq21,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq21,rinv21),crf));
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq22,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq22,rinv22),crf));
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq23,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq23,rinv23),crf));
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq31,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq31,rinv31),crf));
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq32,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq32,rinv32),crf));
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq33,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq33,rinv33),crf));
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- /* Update potential sum for this i atom from the interaction with this j atom. */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- velecsum = _fjsp_add_v2r8(velecsum,velec);
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 315 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- ggid = gid[iidx];
- /* Update potential energies */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 19 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*315);
-}
-/*
- * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- * Electrostatics interaction: ReactionField
- * VdW interaction: None
- * Geometry: Water4-Water4
- * Calculate force/pot: Force
- */
-void
-nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- int vdwioffset1;
- _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
- int vdwioffset2;
- _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
- int vdwioffset3;
- _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
- int vdwjidx1A,vdwjidx1B;
- _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
- int vdwjidx2A,vdwjidx2B;
- _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
- int vdwjidx3A,vdwjidx3B;
- _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
- _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11;
- _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12;
- _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13;
- _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21;
- _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22;
- _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23;
- _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31;
- _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32;
- _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33;
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
-
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1]));
- iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2]));
- iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3]));
-
- jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]);
- jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]);
- jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]);
- qq11 = _fjsp_mul_v2r8(iq1,jq1);
- qq12 = _fjsp_mul_v2r8(iq1,jq2);
- qq13 = _fjsp_mul_v2r8(iq1,jq3);
- qq21 = _fjsp_mul_v2r8(iq2,jq1);
- qq22 = _fjsp_mul_v2r8(iq2,jq2);
- qq23 = _fjsp_mul_v2r8(iq2,jq3);
- qq31 = _fjsp_mul_v2r8(iq3,jq1);
- qq32 = _fjsp_mul_v2r8(iq3,jq2);
- qq33 = _fjsp_mul_v2r8(iq3,jq3);
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
-
- fix1 = _fjsp_setzero_v2r8();
- fiy1 = _fjsp_setzero_v2r8();
- fiz1 = _fjsp_setzero_v2r8();
- fix2 = _fjsp_setzero_v2r8();
- fiy2 = _fjsp_setzero_v2r8();
- fiz2 = _fjsp_setzero_v2r8();
- fix3 = _fjsp_setzero_v2r8();
- fiy3 = _fjsp_setzero_v2r8();
- fiz3 = _fjsp_setzero_v2r8();
-
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
-
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- fscal = felec;
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 270 flops */
- }
-
- if(jidx<j_index_end)
- {
-
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
-
- /* Calculate displacement vector */
- dx11 = _fjsp_sub_v2r8(ix1,jx1);
- dy11 = _fjsp_sub_v2r8(iy1,jy1);
- dz11 = _fjsp_sub_v2r8(iz1,jz1);
- dx12 = _fjsp_sub_v2r8(ix1,jx2);
- dy12 = _fjsp_sub_v2r8(iy1,jy2);
- dz12 = _fjsp_sub_v2r8(iz1,jz2);
- dx13 = _fjsp_sub_v2r8(ix1,jx3);
- dy13 = _fjsp_sub_v2r8(iy1,jy3);
- dz13 = _fjsp_sub_v2r8(iz1,jz3);
- dx21 = _fjsp_sub_v2r8(ix2,jx1);
- dy21 = _fjsp_sub_v2r8(iy2,jy1);
- dz21 = _fjsp_sub_v2r8(iz2,jz1);
- dx22 = _fjsp_sub_v2r8(ix2,jx2);
- dy22 = _fjsp_sub_v2r8(iy2,jy2);
- dz22 = _fjsp_sub_v2r8(iz2,jz2);
- dx23 = _fjsp_sub_v2r8(ix2,jx3);
- dy23 = _fjsp_sub_v2r8(iy2,jy3);
- dz23 = _fjsp_sub_v2r8(iz2,jz3);
- dx31 = _fjsp_sub_v2r8(ix3,jx1);
- dy31 = _fjsp_sub_v2r8(iy3,jy1);
- dz31 = _fjsp_sub_v2r8(iz3,jz1);
- dx32 = _fjsp_sub_v2r8(ix3,jx2);
- dy32 = _fjsp_sub_v2r8(iy3,jy2);
- dz32 = _fjsp_sub_v2r8(iz3,jz2);
- dx33 = _fjsp_sub_v2r8(ix3,jx3);
- dy33 = _fjsp_sub_v2r8(iy3,jy3);
- dz33 = _fjsp_sub_v2r8(iz3,jz3);
-
- /* Calculate squared distance and things based on it */
- rsq11 = gmx_fjsp_calc_rsq_v2r8(dx11,dy11,dz11);
- rsq12 = gmx_fjsp_calc_rsq_v2r8(dx12,dy12,dz12);
- rsq13 = gmx_fjsp_calc_rsq_v2r8(dx13,dy13,dz13);
- rsq21 = gmx_fjsp_calc_rsq_v2r8(dx21,dy21,dz21);
- rsq22 = gmx_fjsp_calc_rsq_v2r8(dx22,dy22,dz22);
- rsq23 = gmx_fjsp_calc_rsq_v2r8(dx23,dy23,dz23);
- rsq31 = gmx_fjsp_calc_rsq_v2r8(dx31,dy31,dz31);
- rsq32 = gmx_fjsp_calc_rsq_v2r8(dx32,dy32,dz32);
- rsq33 = gmx_fjsp_calc_rsq_v2r8(dx33,dy33,dz33);
-
- rinv11 = gmx_fjsp_invsqrt_v2r8(rsq11);
- rinv12 = gmx_fjsp_invsqrt_v2r8(rsq12);
- rinv13 = gmx_fjsp_invsqrt_v2r8(rsq13);
- rinv21 = gmx_fjsp_invsqrt_v2r8(rsq21);
- rinv22 = gmx_fjsp_invsqrt_v2r8(rsq22);
- rinv23 = gmx_fjsp_invsqrt_v2r8(rsq23);
- rinv31 = gmx_fjsp_invsqrt_v2r8(rsq31);
- rinv32 = gmx_fjsp_invsqrt_v2r8(rsq32);
- rinv33 = gmx_fjsp_invsqrt_v2r8(rsq33);
-
- rinvsq11 = _fjsp_mul_v2r8(rinv11,rinv11);
- rinvsq12 = _fjsp_mul_v2r8(rinv12,rinv12);
- rinvsq13 = _fjsp_mul_v2r8(rinv13,rinv13);
- rinvsq21 = _fjsp_mul_v2r8(rinv21,rinv21);
- rinvsq22 = _fjsp_mul_v2r8(rinv22,rinv22);
- rinvsq23 = _fjsp_mul_v2r8(rinv23,rinv23);
- rinvsq31 = _fjsp_mul_v2r8(rinv31,rinv31);
- rinvsq32 = _fjsp_mul_v2r8(rinv32,rinv32);
- rinvsq33 = _fjsp_mul_v2r8(rinv33,rinv33);
-
- fjx1 = _fjsp_setzero_v2r8();
- fjy1 = _fjsp_setzero_v2r8();
- fjz1 = _fjsp_setzero_v2r8();
- fjx2 = _fjsp_setzero_v2r8();
- fjy2 = _fjsp_setzero_v2r8();
- fjz2 = _fjsp_setzero_v2r8();
- fjx3 = _fjsp_setzero_v2r8();
- fjy3 = _fjsp_setzero_v2r8();
- fjz3 = _fjsp_setzero_v2r8();
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq11,_fjsp_msub_v2r8(rinv11,rinvsq11,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx11,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy11,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz11,fscal,fiz1);
-
- fjx1 = _fjsp_madd_v2r8(dx11,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy11,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz11,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq12,_fjsp_msub_v2r8(rinv12,rinvsq12,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx12,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy12,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz12,fscal,fiz1);
-
- fjx2 = _fjsp_madd_v2r8(dx12,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy12,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz12,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq13,_fjsp_msub_v2r8(rinv13,rinvsq13,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix1 = _fjsp_madd_v2r8(dx13,fscal,fix1);
- fiy1 = _fjsp_madd_v2r8(dy13,fscal,fiy1);
- fiz1 = _fjsp_madd_v2r8(dz13,fscal,fiz1);
-
- fjx3 = _fjsp_madd_v2r8(dx13,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy13,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz13,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq21,_fjsp_msub_v2r8(rinv21,rinvsq21,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx21,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy21,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz21,fscal,fiz2);
-
- fjx1 = _fjsp_madd_v2r8(dx21,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy21,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz21,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq22,_fjsp_msub_v2r8(rinv22,rinvsq22,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx22,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy22,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz22,fscal,fiz2);
-
- fjx2 = _fjsp_madd_v2r8(dx22,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy22,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz22,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq23,_fjsp_msub_v2r8(rinv23,rinvsq23,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix2 = _fjsp_madd_v2r8(dx23,fscal,fix2);
- fiy2 = _fjsp_madd_v2r8(dy23,fscal,fiy2);
- fiz2 = _fjsp_madd_v2r8(dz23,fscal,fiz2);
-
- fjx3 = _fjsp_madd_v2r8(dx23,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy23,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz23,fscal,fjz3);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq31,_fjsp_msub_v2r8(rinv31,rinvsq31,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx31,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy31,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz31,fscal,fiz3);
-
- fjx1 = _fjsp_madd_v2r8(dx31,fscal,fjx1);
- fjy1 = _fjsp_madd_v2r8(dy31,fscal,fjy1);
- fjz1 = _fjsp_madd_v2r8(dz31,fscal,fjz1);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq32,_fjsp_msub_v2r8(rinv32,rinvsq32,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx32,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy32,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz32,fscal,fiz3);
-
- fjx2 = _fjsp_madd_v2r8(dx32,fscal,fjx2);
- fjy2 = _fjsp_madd_v2r8(dy32,fscal,fjy2);
- fjz2 = _fjsp_madd_v2r8(dz32,fscal,fjz2);
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* REACTION-FIELD ELECTROSTATICS */
- felec = _fjsp_mul_v2r8(qq33,_fjsp_msub_v2r8(rinv33,rinvsq33,krf2));
-
- fscal = felec;
-
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
-
- /* Update vectorial force */
- fix3 = _fjsp_madd_v2r8(dx33,fscal,fix3);
- fiy3 = _fjsp_madd_v2r8(dy33,fscal,fiy3);
- fiz3 = _fjsp_madd_v2r8(dz33,fscal,fiz3);
-
- fjx3 = _fjsp_madd_v2r8(dx33,fscal,fjx3);
- fjy3 = _fjsp_madd_v2r8(dy33,fscal,fjy3);
- fjz3 = _fjsp_madd_v2r8(dz33,fscal,fjz3);
-
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
-
- /* Inner loop uses 270 flops */
- }
-
- /* End of innermost loop */
-
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses 18 flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
-
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*18 + inneriter*270);
-}
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*
- * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
- */
-#include "gmxpre.h"
-
-#include "gromacs/gmxlib/nonbonded/nb_kernel.h"
-
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJEw_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double;
-extern nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double;
-
-
-nb_kernel_info_t
- kernellist_sparc64_hpc_ace_double[] =
-{
- { nb_kernel_ElecNone_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecNone_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LJEwald", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LJEwald", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LJEwald", "PotentialShift", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
- { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
- { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
- { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
- { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
- { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
- { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
- { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
- { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
- { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
- { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
- { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
- { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwLJEw_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "Water3Particle", "", "Force" },
- { nb_kernel_ElecEw_VdwLJEw_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwLJEw_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "Water3Water3", "", "Force" },
- { nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "Water4Particle", "", "Force" },
- { nb_kernel_ElecEw_VdwLJEw_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwLJEw_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJEw_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LJEwald", "None", "Water4Water4", "", "Force" },
- { nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
- { nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
- { nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
- { nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
- { nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "Water3Particle", "", "Force" },
- { nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "Water3Water3", "", "Force" },
- { nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "Water4Particle", "", "Force" },
- { nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "None", "None", "Water4Water4", "", "Force" },
- { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
- { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
- { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
- { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
- { nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "Water3Particle", "", "Force" },
- { nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "Water3Water3", "", "Force" },
- { nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "Water4Particle", "", "Force" },
- { nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LJEwald", "PotentialShift", "Water4Water4", "", "Force" },
- { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
- { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
- { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
- { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
- { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "Force" },
- { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "Force" },
- { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "Force" },
- { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "Force" },
- { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
- { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
- { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
- { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
- { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "Force" },
- { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "Force" },
- { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
- { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
- { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
- { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
- { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
- { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
- { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
- { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
- { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
- { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
- { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "Force" },
- { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "Force" },
- { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "Force" },
- { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "Force" },
- { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
- { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
- { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
- { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
- { nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
- { nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
- { nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
- { nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
- { nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "Water3Particle", "", "Force" },
- { nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "Water3Water3", "", "Force" },
- { nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "Water4Particle", "", "Force" },
- { nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "None", "None", "Water4Water4", "", "Force" },
- { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
- { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
- { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
- { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
- { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
- { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
- { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
- { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
- { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
- { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
- { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
- { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
- { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
- { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
- { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
- { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
- { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
- { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
- { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
- { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
- { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double", "sparc64_hpc_ace_double", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" }
-};
-
-int
- kernellist_sparc64_hpc_ace_double_size = sizeof(kernellist_sparc64_hpc_ace_double)/sizeof(kernellist_sparc64_hpc_ace_double[0]);
+++ /dev/null
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#ifndef nb_kernel_sparc64_hpc_ace_double_h
-#define nb_kernel_sparc64_hpc_ace_double_h
-
-#include "gromacs/gmxlib/nonbonded/nb_kernel.h"
-
-/* List of kernels for this architecture with metadata about them */
-extern nb_kernel_info_t
- kernellist_sparc64_hpc_ace_double[];
-
-/* Length of kernellist_c */
-extern int
- kernellist_sparc64_hpc_ace_double_size;
-
-#endif
+++ /dev/null
-/* #if 0 */
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#error This file must be processed with the Gromacs pre-preprocessor
-/* #endif */
-/* #if INCLUDE_HEADER */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "../nb_kernel.h"
-#include "gromacs/gmxlib/nrnb.h"
-
-#include "kernelutil_sparc64_hpc_ace_double.h"
-/* #endif */
-
-/* ## List of variables set by the generating script: */
-/* ## */
-/* ## Setttings that apply to the entire kernel: */
-/* ## KERNEL_ELEC: String, choice for electrostatic interactions */
-/* ## KERNEL_VDW: String, choice for van der Waals interactions */
-/* ## KERNEL_NAME: String, name of this kernel */
-/* ## KERNEL_VF: String telling if we calculate potential, force, or both */
-/* ## GEOMETRY_I/GEOMETRY_J: String, name of each geometry, e.g. 'Water3' or '1Particle' */
-/* ## */
-/* ## Setttings that apply to particles in the outer (I) or inner (J) loops: */
-/* ## PARTICLES_I[]/ Arrays with lists of i/j particles to use in kernel. It is */
-/* ## PARTICLES_J[]: just [0] for particle geometry, but can be longer for water */
-/* ## PARTICLES_ELEC_I[]/ Arrays with lists of i/j particle that have electrostatics */
-/* ## PARTICLES_ELEC_J[]: interactions that should be calculated in this kernel. */
-/* ## PARTICLES_VDW_I[]/ Arrays with the list of i/j particle that have VdW */
-/* ## PARTICLES_VDW_J[]: interactions that should be calculated in this kernel. */
-/* ## */
-/* ## Setttings for pairs of interactions (e.g. 2nd i particle against 1st j particle) */
-/* ## PAIRS_IJ[]: Array with (i,j) tuples of pairs for which interactions */
-/* ## should be calculated in this kernel. Zero-charge particles */
-/* ## do not have interactions with particles without vdw, and */
-/* ## Vdw-only interactions are not evaluated in a no-vdw-kernel. */
-/* ## INTERACTION_FLAGS[][]: 2D matrix, dimension e.g. 3*3 for water-water interactions. */
-/* ## For each i-j pair, the element [I][J] is a list of strings */
-/* ## defining properties/flags of this interaction. Examples */
-/* ## include 'electrostatics'/'vdw' if that type of interaction */
-/* ## should be evaluated, 'rsq'/'rinv'/'rinvsq' if those values */
-/* ## are needed, and 'exactcutoff' or 'shift','switch' to */
-/* ## decide if the force/potential should be modified. This way */
-/* ## we only calculate values absolutely needed for each case. */
-
-/* ## Calculate the size and offset for (merged/interleaved) table data */
-
-/*
- * Gromacs nonbonded kernel: {KERNEL_NAME}
- * Electrostatics interaction: {KERNEL_ELEC}
- * VdW interaction: {KERNEL_VDW}
- * Geometry: {GEOMETRY_I}-{GEOMETRY_J}
- * Calculate force/pot: {KERNEL_VF}
- */
-void
-{KERNEL_NAME}
- (t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- struct t_forcerec * gmx_restrict fr,
- t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
-{
- /* ## Not all variables are used for all kernels, but any optimizing compiler fixes that, */
- /* ## so there is no point in going to extremes to exclude variables that are not needed. */
- /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
- * just 0 for non-waters.
- * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
- * jnr indices corresponding to data put in the four positions in the SIMD register.
- */
- int i_shift_offset,i_coord_offset,outeriter,inneriter;
- int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx;
- int jnrA,jnrB;
- int j_coord_offsetA,j_coord_offsetB;
- int *iinr,*jindex,*jjnr,*shiftidx,*gid;
- real rcutoff_scalar;
- real *shiftvec,*fshift,*x,*f;
- _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall;
- /* #for I in PARTICLES_I */
- int vdwioffset{I};
- _fjsp_v2r8 ix{I},iy{I},iz{I},fix{I},fiy{I},fiz{I},iq{I},isai{I};
- /* #endfor */
- /* #for J in PARTICLES_J */
- int vdwjidx{J}A,vdwjidx{J}B;
- _fjsp_v2r8 jx{J},jy{J},jz{J},fjx{J},fjy{J},fjz{J},jq{J},isaj{J};
- /* #endfor */
- /* #for I,J in PAIRS_IJ */
- _fjsp_v2r8 dx{I}{J},dy{I}{J},dz{I}{J},rsq{I}{J},rinv{I}{J},rinvsq{I}{J},r{I}{J},qq{I}{J},c6_{I}{J},c12_{I}{J};
- /* #endfor */
- /* #if KERNEL_ELEC != 'None' */
- _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2;
- real *charge;
- /* #endif */
- /* #if KERNEL_VDW != 'None' */
- int nvdwtype;
- _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6;
- int *vdwtype;
- real *vdwparam;
- _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0);
- _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0);
- /* #endif */
- /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
- _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps;
- real *vftab;
- /* #endif */
- /* #if 'LJEwald' in KERNEL_VDW */
- /* #for I,J in PAIRS_IJ */
- _fjsp_v2r8 c6grid_{I}{J};
- /* #endfor */
- real *vdwgridparam;
- _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald;
- _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5);
- _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0);
- /* #endif */
- /* #if 'Ewald' in KERNEL_ELEC */
- _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV;
- real *ewtab;
- /* #endif */
- /* #if 'PotentialSwitch' in [KERNEL_MOD_ELEC,KERNEL_MOD_VDW] */
- _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
- real rswitch_scalar,d_scalar;
- /* #endif */
- _fjsp_v2r8 itab_tmp;
- _fjsp_v2r8 dummy_mask,cutoff_mask;
- _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0);
- _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0);
- union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv;
-
- x = xx[0];
- f = ff[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- shiftidx = nlist->shift;
- gid = nlist->gid;
- shiftvec = fr->shift_vec[0];
- fshift = fr->fshift[0];
- /* #if KERNEL_ELEC != 'None' */
- facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac);
- charge = mdatoms->chargeA;
- /* #if 'ReactionField' in KERNEL_ELEC */
- krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf);
- krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0);
- crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf);
- /* #endif */
- /* #endif */
- /* #if KERNEL_VDW != 'None' */
- nvdwtype = fr->ntype;
- vdwparam = fr->nbfp;
- vdwtype = mdatoms->typeA;
- /* #endif */
- /* #if 'LJEwald' in KERNEL_VDW */
- vdwgridparam = fr->ljpme_c6grid;
- sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald);
- ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj);
- ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj));
- /* #endif */
-
- /* #if 'Table' in KERNEL_ELEC and 'Table' in KERNEL_VDW */
- vftab = kernel_data->table_elec_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale);
- /* #elif 'Table' in KERNEL_ELEC */
- vftab = kernel_data->table_elec->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale);
- /* #elif 'Table' in KERNEL_VDW */
- vftab = kernel_data->table_vdw->data;
- vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale);
- /* #endif */
-
- /* #if 'Ewald' in KERNEL_ELEC */
- sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald);
- /* #if KERNEL_VF=='Force' and KERNEL_MOD_ELEC!='PotentialSwitch' */
- ewtab = fr->ic->tabq_coul_F;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
- /* #else */
- ewtab = fr->ic->tabq_coul_FDV0;
- ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale);
- ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale);
- /* #endif */
- /* #endif */
-
- /* #if 'Water' in GEOMETRY_I */
- /* Setup water-specific parameters */
- inr = nlist->iinr[0];
- /* #for I in PARTICLES_ELEC_I */
- iq{I} = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+{I}]));
- /* #endfor */
- /* #for I in PARTICLES_VDW_I */
- vdwioffset{I} = 2*nvdwtype*vdwtype[inr+{I}];
- /* #endfor */
- /* #endif */
-
- /* #if 'Water' in GEOMETRY_J */
- /* #for J in PARTICLES_ELEC_J */
- jq{J} = gmx_fjsp_set1_v2r8(charge[inr+{J}]);
- /* #endfor */
- /* #for J in PARTICLES_VDW_J */
- vdwjidx{J}A = 2*vdwtype[inr+{J}];
- /* #endfor */
- /* #for I,J in PAIRS_IJ */
- /* #if 'electrostatics' in INTERACTION_FLAGS[I][J] */
- qq{I}{J} = _fjsp_mul_v2r8(iq{I},jq{J});
- /* #endif */
- /* #if 'vdw' in INTERACTION_FLAGS[I][J] */
- /* #if 'LJEwald' in KERNEL_VDW */
- c6_{I}{J} = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset{I}+vdwjidx{J}A]);
- c12_{I}{J} = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset{I}+vdwjidx{J}A+1]);
- c6grid_{I}{J} = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset{I}+vdwjidx{J}A]);
- /* #else */
- c6_{I}{J} = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset{I}+vdwjidx{J}A]);
- c12_{I}{J} = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset{I}+vdwjidx{J}A+1]);
- /* #endif */
- /* #endif */
- /* #endfor */
- /* #endif */
-
- /* #if KERNEL_MOD_ELEC!='None' or KERNEL_MOD_VDW!='None' */
- /* #if KERNEL_ELEC!='None' */
- /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
- rcutoff_scalar = fr->ic->rcoulomb;
- /* #else */
- rcutoff_scalar = fr->ic->rvdw;
- /* #endif */
- rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar);
- rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff);
- /* #endif */
-
- /* #if KERNEL_MOD_VDW=='PotentialShift' */
- sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6);
- rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw);
- /* #endif */
-
- /* #if 'PotentialSwitch' in [KERNEL_MOD_ELEC,KERNEL_MOD_VDW] */
- /* #if KERNEL_MOD_ELEC=='PotentialSwitch' */
- rswitch_scalar = fr->ic->rcoulomb_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* #else */
- rswitch_scalar = fr->ic->rvdw_switch;
- rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar);
- /* #endif */
- /* Setup switch parameters */
- d_scalar = rcutoff_scalar-rswitch_scalar;
- d = gmx_fjsp_set1_v2r8(d_scalar);
- swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar));
- swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- /* #if 'Force' in KERNEL_VF */
- swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar));
- swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar));
- swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar));
- /* #endif */
- /* #endif */
-
- /* Avoid stupid compiler warnings */
- jnrA = jnrB = 0;
- j_coord_offsetA = 0;
- j_coord_offsetB = 0;
-
- /* ## Keep track of the floating point operations we issue for reporting! */
- /* #define OUTERFLOPS 0 */
- outeriter = 0;
- inneriter = 0;
-
- /* Start outer loop over neighborlists */
- for(iidx=0; iidx<nri; iidx++)
- {
- /* Load shift vector for this list */
- i_shift_offset = DIM*shiftidx[iidx];
-
- /* Load limits for loop over neighbors */
- j_index_start = jindex[iidx];
- j_index_end = jindex[iidx+1];
-
- /* Get outer coordinate index */
- inr = iinr[iidx];
- i_coord_offset = DIM*inr;
-
- /* Load i particle coords and add shift vector */
- /* #if GEOMETRY_I == 'Particle' */
- gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,&ix0,&iy0,&iz0);
- /* #elif GEOMETRY_I == 'Water3' */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2);
- /* #elif GEOMETRY_I == 'Water4' */
- /* #if 0 in PARTICLES_I */
- gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset,
- &ix0,&iy0,&iz0,&ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
- /* #else */
- gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec+i_shift_offset,x+i_coord_offset+DIM,
- &ix1,&iy1,&iz1,&ix2,&iy2,&iz2,&ix3,&iy3,&iz3);
- /* #endif */
- /* #endif */
-
- /* #if 'Force' in KERNEL_VF */
- /* #for I in PARTICLES_I */
- fix{I} = _fjsp_setzero_v2r8();
- fiy{I} = _fjsp_setzero_v2r8();
- fiz{I} = _fjsp_setzero_v2r8();
- /* #endfor */
- /* #endif */
-
- /* ## For water we already preloaded parameters at the start of the kernel */
- /* #if not 'Water' in GEOMETRY_I */
- /* Load parameters for i particles */
- /* #for I in PARTICLES_ELEC_I */
- iq{I} = _fjsp_mul_v2r8(facel,gmx_fjsp_load1_v2r8(charge+inr+{I}));
- /* #define OUTERFLOPS OUTERFLOPS+1 */
- /* #endfor */
- /* #for I in PARTICLES_VDW_I */
- vdwioffset{I} = 2*nvdwtype*vdwtype[inr+{I}];
- /* #endfor */
- /* #endif */
-
- /* #if 'Potential' in KERNEL_VF */
- /* Reset potential sums */
- /* #if KERNEL_ELEC != 'None' */
- velecsum = _fjsp_setzero_v2r8();
- /* #endif */
- /* #if KERNEL_VDW != 'None' */
- vvdwsum = _fjsp_setzero_v2r8();
- /* #endif */
- /* #endif */
-
- /* #for ROUND in ['Loop','Epilogue'] */
-
- /* #if ROUND =='Loop' */
- /* Start inner kernel loop */
- for(jidx=j_index_start; jidx<j_index_end-1; jidx+=2)
- {
- /* ## First round is normal loop (next statement resets indentation) */
- /* #if 0 */
- }
- /* #endif */
- /* #else */
- if(jidx<j_index_end)
- {
- /* ## Second round is epilogue */
- /* #endif */
- /* #define INNERFLOPS 0 */
-
- /* #if ROUND =='Loop' */
- /* Get j neighbor index, and coordinate index */
- jnrA = jjnr[jidx];
- jnrB = jjnr[jidx+1];
- j_coord_offsetA = DIM*jnrA;
- j_coord_offsetB = DIM*jnrB;
-
- /* load j atom coordinates */
- /* #if GEOMETRY_J == 'Particle' */
- gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0);
- /* #elif GEOMETRY_J == 'Water3' */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
- /* #elif GEOMETRY_J == 'Water4' */
- /* #if 0 in PARTICLES_J */
- gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA,x+j_coord_offsetB,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
- /* #else */
- gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,x+j_coord_offsetB+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
- /* #endif */
- /* #endif */
- /* #else */
- jnrA = jjnr[jidx];
- j_coord_offsetA = DIM*jnrA;
-
- /* load j atom coordinates */
- /* #if GEOMETRY_J == 'Particle' */
- gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0);
- /* #elif GEOMETRY_J == 'Water3' */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,&jy2,&jz2);
- /* #elif GEOMETRY_J == 'Water4' */
- /* #if 0 in PARTICLES_J */
- gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA,
- &jx0,&jy0,&jz0,&jx1,&jy1,&jz1,&jx2,
- &jy2,&jz2,&jx3,&jy3,&jz3);
- /* #else */
- gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x+j_coord_offsetA+DIM,
- &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);
- /* #endif */
- /* #endif */
- /* #endif */
-
- /* Calculate displacement vector */
- /* #for I,J in PAIRS_IJ */
- dx{I}{J} = _fjsp_sub_v2r8(ix{I},jx{J});
- dy{I}{J} = _fjsp_sub_v2r8(iy{I},jy{J});
- dz{I}{J} = _fjsp_sub_v2r8(iz{I},jz{J});
- /* #define INNERFLOPS INNERFLOPS+3 */
- /* #endfor */
-
- /* Calculate squared distance and things based on it */
- /* #for I,J in PAIRS_IJ */
- rsq{I}{J} = gmx_fjsp_calc_rsq_v2r8(dx{I}{J},dy{I}{J},dz{I}{J});
- /* #define INNERFLOPS INNERFLOPS+5 */
- /* #endfor */
-
- /* #for I,J in PAIRS_IJ */
- /* #if 'rinv' in INTERACTION_FLAGS[I][J] */
- rinv{I}{J} = gmx_fjsp_invsqrt_v2r8(rsq{I}{J});
- /* #define INNERFLOPS INNERFLOPS+5 */
- /* #endif */
- /* #endfor */
-
- /* #for I,J in PAIRS_IJ */
- /* #if 'rinvsq' in INTERACTION_FLAGS[I][J] */
- /* # if 'rinv' not in INTERACTION_FLAGS[I][J] */
- rinvsq{I}{J} = gmx_fjsp_inv_v2r8(rsq{I}{J});
- /* #define INNERFLOPS INNERFLOPS+4 */
- /* #else */
- rinvsq{I}{J} = _fjsp_mul_v2r8(rinv{I}{J},rinv{I}{J});
- /* #define INNERFLOPS INNERFLOPS+1 */
- /* #endif */
- /* #endif */
- /* #endfor */
-
- /* #if not 'Water' in GEOMETRY_J */
- /* Load parameters for j particles */
- /* #for J in PARTICLES_ELEC_J */
- /* #if ROUND =='Loop' */
- jq{J} = gmx_fjsp_load_2real_swizzle_v2r8(charge+jnrA+{J},charge+jnrB+{J});
- /* #else */
- jq{J} = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge+jnrA+{J});
- /* #endif */
- /* #endfor */
- /* #for J in PARTICLES_VDW_J */
- vdwjidx{J}A = 2*vdwtype[jnrA+{J}];
- /* #if ROUND =='Loop' */
- vdwjidx{J}B = 2*vdwtype[jnrB+{J}];
- /* #endif */
- /* #endfor */
- /* #endif */
-
- /* #if 'Force' in KERNEL_VF and not 'Particle' in GEOMETRY_I */
- /* #for J in PARTICLES_J */
- fjx{J} = _fjsp_setzero_v2r8();
- fjy{J} = _fjsp_setzero_v2r8();
- fjz{J} = _fjsp_setzero_v2r8();
- /* #endfor */
- /* #endif */
-
- /* #for I,J in PAIRS_IJ */
-
- /**************************
- * CALCULATE INTERACTIONS *
- **************************/
-
- /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
- /* ## We always calculate rinv/rinvsq above to enable pipelineing in compilers (performance tested on x86) */
- if (gmx_fjsp_any_lt_v2r8(rsq{I}{J},rcutoff2))
- {
- /* #if 0 ## this and the next two lines is a hack to maintain auto-indentation in template file */
- }
- /* #endif */
- /* #define INNERFLOPS INNERFLOPS+1 */
- /* #endif */
-
- /* #if 'r' in INTERACTION_FLAGS[I][J] */
- r{I}{J} = _fjsp_mul_v2r8(rsq{I}{J},rinv{I}{J});
- /* #define INNERFLOPS INNERFLOPS+1 */
- /* #endif */
-
- /* ## For water geometries we already loaded parameters at the start of the kernel */
- /* #if not 'Water' in GEOMETRY_J */
- /* Compute parameters for interactions between i and j atoms */
- /* #if 'electrostatics' in INTERACTION_FLAGS[I][J] */
- qq{I}{J} = _fjsp_mul_v2r8(iq{I},jq{J});
- /* #define INNERFLOPS INNERFLOPS+1 */
- /* #endif */
- /* #if 'vdw' in INTERACTION_FLAGS[I][J] */
- gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset{I}+vdwjidx{J}A,
- vdwparam+vdwioffset{I}+vdwjidx{J}B,&c6_{I}{J},&c12_{I}{J});
-
- /* #if 'LJEwald' in KERNEL_VDW */
- c6grid_{I}{J} = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset{I}+vdwjidx{J}A,
- vdwgridparam+vdwioffset{I}+vdwjidx{J}B);
- /* #endif */
- /* #endif */
- /* #endif */
-
- /* #if 'table' in INTERACTION_FLAGS[I][J] */
- /* Calculate table index by multiplying r with table scale and truncate to integer */
- rt = _fjsp_mul_v2r8(r{I}{J},vftabscale);
- itab_tmp = _fjsp_dtox_v2r8(rt);
- vfeps = _fjsp_sub_v2r8(rt, _fjsp_xtod_v2r8(itab_tmp));
- twovfeps = _fjsp_add_v2r8(vfeps,vfeps);
- _fjsp_store_v2r8(&vfconv.simd,itab_tmp);
-
- /* #define INNERFLOPS INNERFLOPS+4 */
- /* #if 'Table' in KERNEL_ELEC and 'Table' in KERNEL_VDW */
- /* ## 3 tables, 4 data per point: multiply index by 12 */
- vfconv.i[0] *= 12;
- vfconv.i[1] *= 12;
- /* #elif 'Table' in KERNEL_ELEC */
- /* ## 1 table, 4 data per point: multiply index by 4 */
- vfconv.i[0] *= 4;
- vfconv.i[1] *= 4;
- /* #elif 'Table' in KERNEL_VDW */
- /* ## 2 tables, 4 data per point: multiply index by 8 */
- vfconv.i[0] *= 8;
- vfconv.i[1] *= 8;
- /* #endif */
- /* #endif */
-
- /* ## ELECTROSTATIC INTERACTIONS */
- /* #if 'electrostatics' in INTERACTION_FLAGS[I][J] */
-
- /* #if KERNEL_ELEC=='Coulomb' */
-
- /* COULOMB ELECTROSTATICS */
- velec = _fjsp_mul_v2r8(qq{I}{J},rinv{I}{J});
- /* #define INNERFLOPS INNERFLOPS+1 */
- /* #if 'Force' in KERNEL_VF */
- felec = _fjsp_mul_v2r8(velec,rinvsq{I}{J});
- /* #define INNERFLOPS INNERFLOPS+2 */
- /* #endif */
-
- /* #elif KERNEL_ELEC=='ReactionField' */
-
- /* REACTION-FIELD ELECTROSTATICS */
- /* #if 'Potential' in KERNEL_VF */
- velec = _fjsp_mul_v2r8(qq{I}{J},_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf,rsq{I}{J},rinv{I}{J}),crf));
- /* #define INNERFLOPS INNERFLOPS+4 */
- /* #endif */
- /* #if 'Force' in KERNEL_VF */
- felec = _fjsp_mul_v2r8(qq{I}{J},_fjsp_msub_v2r8(rinv{I}{J},rinvsq{I}{J},krf2));
- /* #define INNERFLOPS INNERFLOPS+3 */
- /* #endif */
-
- /* #elif KERNEL_ELEC=='Ewald' */
- /* EWALD ELECTROSTATICS */
-
- /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
- ewrt = _fjsp_mul_v2r8(r{I}{J},ewtabscale);
- itab_tmp = _fjsp_dtox_v2r8(ewrt);
- eweps = _fjsp_sub_v2r8(ewrt,_fjsp_xtod_v2r8(itab_tmp));
- _fjsp_store_v2r8(&ewconv.simd,itab_tmp);
-
- /* #define INNERFLOPS INNERFLOPS+4 */
- /* #if 'Potential' in KERNEL_VF or KERNEL_MOD_ELEC=='PotentialSwitch' */
- ewtabF = _fjsp_load_v2r8( ewtab + 4*ewconv.i[0] );
- /* #if ROUND == 'Loop' */
- ewtabD = _fjsp_load_v2r8( ewtab + 4*ewconv.i[1] );
- /* #else */
- ewtabD = _fjsp_setzero_v2r8();
- /* #endif */
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabF,ewtabD);
- ewtabV = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[0] +2);
- /* #if ROUND == 'Loop' */
- ewtabFn = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ewtab + 4*ewconv.i[1] +2);
- /* #else */
- ewtabFn = _fjsp_setzero_v2r8();
- /* #endif */
- GMX_FJSP_TRANSPOSE2_V2R8(ewtabV,ewtabFn);
- felec = _fjsp_madd_v2r8(eweps,ewtabD,ewtabF);
- /* #define INNERFLOPS INNERFLOPS+2 */
- /* #if KERNEL_MOD_ELEC=='PotentialShift' */
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq{I}{J},_fjsp_sub_v2r8(_fjsp_sub_v2r8(rinv{I}{J},sh_ewald),velec));
- /* #define INNERFLOPS INNERFLOPS+7 */
- /* #else */
- velec = _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(ewtabhalfspace,eweps) ,_fjsp_add_v2r8(ewtabF,felec), ewtabV);
- velec = _fjsp_mul_v2r8(qq{I}{J},_fjsp_sub_v2r8(rinv{I}{J},velec));
- /* #define INNERFLOPS INNERFLOPS+6 */
- /* #endif */
- /* #if 'Force' in KERNEL_VF */
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq{I}{J},rinv{I}{J}),_fjsp_sub_v2r8(rinvsq{I}{J},felec));
- /* #define INNERFLOPS INNERFLOPS+3 */
- /* #endif */
- /* #elif KERNEL_VF=='Force' */
- /* #if ROUND == 'Loop' */
- gmx_fjsp_load_2pair_swizzle_v2r8(ewtab+ewconv.i[0],ewtab+ewconv.i[1],
- &ewtabF,&ewtabFn);
- /* #else */
- gmx_fjsp_load_1pair_swizzle_v2r8(ewtab+ewconv.i[0],&ewtabF,&ewtabFn);
- /* #endif */
- felec = _fjsp_madd_v2r8(eweps,ewtabFn,_fjsp_nmsub_v2r8(eweps,ewtabF,ewtabF));
- felec = _fjsp_mul_v2r8(_fjsp_mul_v2r8(qq{I}{J},rinv{I}{J}),_fjsp_sub_v2r8(rinvsq{I}{J},felec));
- /* #define INNERFLOPS INNERFLOPS+7 */
- /* #endif */
-
- /* #elif KERNEL_ELEC=='CubicSplineTable' */
-
- /* CUBIC SPLINE TABLE ELECTROSTATICS */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- /* #if ROUND == 'Loop' */
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- /* #else */
- F = _fjsp_setzero_v2r8();
- /* #endif */
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] +2);
- /* #if ROUND == 'Loop' */
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] +2);
- /* #else */
- H = _fjsp_setzero_v2r8();
- /* #endif */
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(vfeps,H,G),F);
- /* #define INNERFLOPS INNERFLOPS+4 */
- /* #if 'Potential' in KERNEL_VF */
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- velec = _fjsp_mul_v2r8(qq{I}{J},VV);
- /* #define INNERFLOPS INNERFLOPS+3 */
- /* #endif */
- /* #if 'Force' in KERNEL_VF */
- FF = _fjsp_madd_v2r8(_fjsp_madd_v2r8(twovfeps,H,G),vfeps,Fp);
- felec = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_mul_v2r8(qq{I}{J},FF),_fjsp_mul_v2r8(vftabscale,rinv{I}{J})));
- /* #define INNERFLOPS INNERFLOPS+7 */
- /* #endif */
- /* #endif */
- /* ## End of check for electrostatics interaction forms */
- /* #endif */
- /* ## END OF ELECTROSTATIC INTERACTION CHECK FOR PAIR I-J */
-
- /* #if 'vdw' in INTERACTION_FLAGS[I][J] */
-
- /* #if KERNEL_VDW=='LennardJones' */
-
- /* LENNARD-JONES DISPERSION/REPULSION */
-
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq{I}{J},rinvsq{I}{J}),rinvsq{I}{J});
- /* #define INNERFLOPS INNERFLOPS+2 */
- /* #if 'Potential' in KERNEL_VF or KERNEL_MOD_VDW=='PotentialSwitch' */
- vvdw6 = _fjsp_mul_v2r8(c6_{I}{J},rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_{I}{J},_fjsp_mul_v2r8(rinvsix,rinvsix));
- /* #define INNERFLOPS INNERFLOPS+3 */
- /* #if KERNEL_MOD_VDW=='PotentialShift' */
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_{I}{J},_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_nmsub_v2r8( c6_{I}{J},sh_vdw_invrcut6,vvdw6),one_sixth));
- /* #define INNERFLOPS INNERFLOPS+8 */
- /* #else */
- vvdw = _fjsp_msub_v2r8( vvdw12,one_twelfth, _fjsp_mul_v2r8(vvdw6,one_sixth) );
- /* #define INNERFLOPS INNERFLOPS+3 */
- /* #endif */
- /* ## Check for force inside potential check, i.e. this means we already did the potential part */
- /* #if 'Force' in KERNEL_VF */
- fvdw = _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12,vvdw6),rinvsq{I}{J});
- /* #define INNERFLOPS INNERFLOPS+2 */
- /* #endif */
- /* #elif KERNEL_VF=='Force' */
- /* ## Force-only LennardJones makes it possible to save 1 flop (they do add up...) */
- fvdw = _fjsp_mul_v2r8(_fjsp_msub_v2r8(c12_{I}{J},rinvsix,c6_{I}{J}),_fjsp_mul_v2r8(rinvsix,rinvsq{I}{J}));
- /* #define INNERFLOPS INNERFLOPS+4 */
- /* #endif */
-
- /* #elif KERNEL_VDW=='CubicSplineTable' */
-
- /* CUBIC SPLINE TABLE DISPERSION */
- /* #if 'Table' in KERNEL_ELEC */
- vfconv.i[0] += 4;
- vfconv.i[1] += 4;
- /* #endif */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] );
- /* #if ROUND == 'Loop' */
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] );
- /* #else */
- F = _fjsp_setzero_v2r8();
- /* #endif */
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 2 );
- /* #if ROUND == 'Loop' */
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 2 );
- /* #else */
- H = _fjsp_setzero_v2r8();
- /* #endif */
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- /* #define INNERFLOPS INNERFLOPS+4 */
- /* #if 'Potential' in KERNEL_VF */
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw6 = _fjsp_mul_v2r8(c6_{I}{J},VV);
- /* #define INNERFLOPS INNERFLOPS+3 */
- /* #endif */
- /* #if 'Force' in KERNEL_VF */
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw6 = _fjsp_mul_v2r8(c6_{I}{J},FF);
- /* #define INNERFLOPS INNERFLOPS+4 */
- /* #endif */
-
- /* CUBIC SPLINE TABLE REPULSION */
- Y = _fjsp_load_v2r8( vftab + vfconv.i[0] + 4 );
- /* #if ROUND == 'Loop' */
- F = _fjsp_load_v2r8( vftab + vfconv.i[1] + 4 );
- /* #else */
- F = _fjsp_setzero_v2r8();
- /* #endif */
- GMX_FJSP_TRANSPOSE2_V2R8(Y,F);
- G = _fjsp_load_v2r8( vftab + vfconv.i[0] + 6 );
- /* #if ROUND == 'Loop' */
- H = _fjsp_load_v2r8( vftab + vfconv.i[1] + 6 );
- /* #else */
- H = _fjsp_setzero_v2r8();
- /* #endif */
- GMX_FJSP_TRANSPOSE2_V2R8(G,H);
- Fp = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(H,vfeps,G),F);
- /* #define INNERFLOPS INNERFLOPS+4 */
- /* #if 'Potential' in KERNEL_VF */
- VV = _fjsp_madd_v2r8(vfeps,Fp,Y);
- vvdw12 = _fjsp_mul_v2r8(c12_{I}{J},VV);
- /* #define INNERFLOPS INNERFLOPS+3 */
- /* #endif */
- /* #if 'Force' in KERNEL_VF */
- FF = _fjsp_madd_v2r8(vfeps,_fjsp_madd_v2r8(twovfeps,H,G),Fp);
- fvdw12 = _fjsp_mul_v2r8(c12_{I}{J},FF);
- /* #define INNERFLOPS INNERFLOPS+5 */
- /* #endif */
- /* #if 'Potential' in KERNEL_VF */
- vvdw = _fjsp_add_v2r8(vvdw12,vvdw6);
- /* #define INNERFLOPS INNERFLOPS+1 */
- /* #endif */
- /* #if 'Force' in KERNEL_VF */
- fvdw = _fjsp_neg_v2r8(_fjsp_mul_v2r8(_fjsp_add_v2r8(fvdw6,fvdw12),_fjsp_mul_v2r8(vftabscale,rinv{I}{J})));
- /* #define INNERFLOPS INNERFLOPS+4 */
- /* #endif */
-
- /* #elif KERNEL_VDW=='LJEwald' */
-
- /* Analytical LJ-PME */
- rinvsix = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq{I}{J},rinvsq{I}{J}),rinvsq{I}{J});
- ewcljrsq = _fjsp_mul_v2r8(ewclj2,rsq{I}{J});
- ewclj6 = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
- exponent = gmx_simd_exp_d(ewcljrsq);
- /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
- poly = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
- /* #define INNERFLOPS INNERFLOPS+9 */
- /* #if 'Potential' in KERNEL_VF or KERNEL_MOD_VDW=='PotentialSwitch' */
- /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
- vvdw6 = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_{I}{J},_fjsp_sub_v2r8(poly,one),c6_{I}{J}),rinvsix);
- vvdw12 = _fjsp_mul_v2r8(c12_{I}{J},_fjsp_mul_v2r8(rinvsix,rinvsix));
- /* #define INNERFLOPS INNERFLOPS+5 */
- /* #if KERNEL_MOD_VDW=='PotentialShift' */
- vvdw = _fjsp_msub_v2r8(_fjsp_nmsub_v2r8(c12_{I}{J},_fjsp_mul_v2r8(sh_vdw_invrcut6,sh_vdw_invrcut6),vvdw12),one_twelfth,
- _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw6,_fjsp_madd_v2r8(c6grid_{I}{J},sh_lj_ewald,_fjsp_mul_v2r8(c6_{I}{J},sh_vdw_invrcut6))),one_sixth));
- /* #define INNERFLOPS INNERFLOPS+7 */
- /* #else */
- vvdw = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));
- /* #define INNERFLOPS INNERFLOPS+2 */
- /* #endif */
- /* ## Check for force inside potential check, i.e. this means we already did the potential part */
- /* #if 'Force' in KERNEL_VF */
- /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
- fvdw = _fjsp_mul_v2r8(_fjsp_add_v2r8(vvdw12,_fjsp_msub_v2r8(_fjsp_mul_v2r8(c6grid_{I}{J},one_sixth),_fjsp_mul_v2r8(exponent,ewclj6),vvdw6)),rinvsq{I}{J});
- /* #define INNERFLOPS INNERFLOPS+6 */
- /* #endif */
- /* #elif KERNEL_VF=='Force' */
- /* f6A = 6 * C6grid * (1 - poly) */
- f6A = _fjsp_mul_v2r8(c6grid_{I}{J},_fjsp_sub_v2r8(one,poly));
- /* f6B = C6grid * exponent * beta^6 */
- f6B = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_{I}{J},one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
- /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
- fvdw = _fjsp_mul_v2r8(_fjsp_madd_v2r8(_fjsp_msub_v2r8(c12_{I}{J},rinvsix,_fjsp_sub_v2r8(c6_{I}{J},f6A)),rinvsix,f6B),rinvsq{I}{J});
- /* #define INNERFLOPS INNERFLOPS+12 */
- /* #endif */
- /* #endif */
- /* ## End of check for vdw interaction forms */
- /* #endif */
- /* ## END OF VDW INTERACTION CHECK FOR PAIR I-J */
-
- /* #if 'switch' in INTERACTION_FLAGS[I][J] */
- d = _fjsp_sub_v2r8(r{I}{J},rswitch);
- d = _fjsp_max_v2r8(d,_fjsp_setzero_v2r8());
- d2 = _fjsp_mul_v2r8(d,d);
- sw = _fjsp_add_v2r8(one,_fjsp_mul_v2r8(d2,_fjsp_mul_v2r8(d,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swV5,swV4),swV3))));
- /* #define INNERFLOPS INNERFLOPS+10 */
-
- /* #if 'Force' in KERNEL_VF */
- dsw = _fjsp_mul_v2r8(d2,_fjsp_madd_v2r8(d,_fjsp_madd_v2r8(d,swF4,swF3),swF2));
- /* #define INNERFLOPS INNERFLOPS+5 */
- /* #endif */
-
- /* Evaluate switch function */
- /* #if 'Force' in KERNEL_VF */
- /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
- /* #if 'electrostatics' in INTERACTION_FLAGS[I][J] and KERNEL_MOD_ELEC=='PotentialSwitch' */
- felec = _fjsp_msub_v2r8( felec,sw , _fjsp_mul_v2r8(rinv{I}{J},_fjsp_mul_v2r8(velec,dsw)) );
- /* #define INNERFLOPS INNERFLOPS+4 */
- /* #endif */
- /* #if 'vdw' in INTERACTION_FLAGS[I][J] and KERNEL_MOD_VDW=='PotentialSwitch' */
- fvdw = _fjsp_msub_v2r8( fvdw,sw , _fjsp_mul_v2r8(rinv{I}{J},_fjsp_mul_v2r8(vvdw,dsw)) );
- /* #define INNERFLOPS INNERFLOPS+4 */
- /* #endif */
- /* #endif */
- /* #if 'Potential' in KERNEL_VF */
- /* #if 'electrostatics' in INTERACTION_FLAGS[I][J] and KERNEL_MOD_ELEC=='PotentialSwitch' */
- velec = _fjsp_mul_v2r8(velec,sw);
- /* #define INNERFLOPS INNERFLOPS+1 */
- /* #endif */
- /* #if 'vdw' in INTERACTION_FLAGS[I][J] and KERNEL_MOD_VDW=='PotentialSwitch' */
- vvdw = _fjsp_mul_v2r8(vvdw,sw);
- /* #define INNERFLOPS INNERFLOPS+1 */
- /* #endif */
- /* #endif */
- /* #endif */
- /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
- cutoff_mask = _fjsp_cmplt_v2r8(rsq{I}{J},rcutoff2);
- /* #define INNERFLOPS INNERFLOPS+1 */
- /* #endif */
-
- /* #if 'Potential' in KERNEL_VF */
- /* Update potential sum for this i atom from the interaction with this j atom. */
- /* #if 'electrostatics' in INTERACTION_FLAGS[I][J] */
- /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
- velec = _fjsp_and_v2r8(velec,cutoff_mask);
- /* #define INNERFLOPS INNERFLOPS+1 */
- /* #endif */
- /* #if ROUND == 'Epilogue' */
- velec = _fjsp_unpacklo_v2r8(velec,_fjsp_setzero_v2r8());
- /* #endif */
- velecsum = _fjsp_add_v2r8(velecsum,velec);
- /* #define INNERFLOPS INNERFLOPS+1 */
- /* #endif */
- /* #if 'vdw' in INTERACTION_FLAGS[I][J] */
- /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
- vvdw = _fjsp_and_v2r8(vvdw,cutoff_mask);
- /* #define INNERFLOPS INNERFLOPS+1 */
- /* #endif */
- /* #if ROUND == 'Epilogue' */
- vvdw = _fjsp_unpacklo_v2r8(vvdw,_fjsp_setzero_v2r8());
- /* #endif */
- vvdwsum = _fjsp_add_v2r8(vvdwsum,vvdw);
- /* #define INNERFLOPS INNERFLOPS+1 */
- /* #endif */
- /* #endif */
-
- /* #if 'Force' in KERNEL_VF */
-
- /* #if 'electrostatics' in INTERACTION_FLAGS[I][J] and 'vdw' in INTERACTION_FLAGS[I][J] */
- fscal = _fjsp_add_v2r8(felec,fvdw);
- /* #define INNERFLOPS INNERFLOPS+1 */
- /* #elif 'electrostatics' in INTERACTION_FLAGS[I][J] */
- fscal = felec;
- /* #elif 'vdw' in INTERACTION_FLAGS[I][J] */
- fscal = fvdw;
- /* #endif */
-
- /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
- fscal = _fjsp_and_v2r8(fscal,cutoff_mask);
- /* #define INNERFLOPS INNERFLOPS+1 */
- /* #endif */
-
- /* #if ROUND == 'Epilogue' */
- fscal = _fjsp_unpacklo_v2r8(fscal,_fjsp_setzero_v2r8());
- /* #endif */
-
- /* ## Construction of vectorial force built into FMA instructions now */
- /* #define INNERFLOPS INNERFLOPS+3 */
-
- /* Update vectorial force */
- fix{I} = _fjsp_madd_v2r8(dx{I}{J},fscal,fix{I});
- fiy{I} = _fjsp_madd_v2r8(dy{I}{J},fscal,fiy{I});
- fiz{I} = _fjsp_madd_v2r8(dz{I}{J},fscal,fiz{I});
- /* #define INNERFLOPS INNERFLOPS+6 */
-
- /* #if GEOMETRY_I == 'Particle' */
- /* #if ROUND == 'Loop' */
- gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fscal,dx{I}{J},dy{I}{J},dz{I}{J});
- /* #else */
- gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fscal,dx{I}{J},dy{I}{J},dz{I}{J});
- /* #endif */
- /* #define INNERFLOPS INNERFLOPS+3 */
- /* #else */
- fjx{J} = _fjsp_madd_v2r8(dx{I}{J},fscal,fjx{J});
- fjy{J} = _fjsp_madd_v2r8(dy{I}{J},fscal,fjy{J});
- fjz{J} = _fjsp_madd_v2r8(dz{I}{J},fscal,fjz{J});
- /* #define INNERFLOPS INNERFLOPS+3 */
- /* #endif */
-
- /* #endif */
-
- /* #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
- /* #if 0 ## This and next two lines is a hack to maintain indentation in template file */
- {
- /* #endif */
- }
- /* #endif */
- /* ## End of check for the interaction being outside the cutoff */
-
- /* #endfor */
- /* ## End of loop over i-j interaction pairs */
-
- /* #if 'Water' in GEOMETRY_I and GEOMETRY_J == 'Particle' */
- /* #if ROUND == 'Loop' */
- gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0);
- /* #else */
- gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0);
- /* #endif */
- /* #define INNERFLOPS INNERFLOPS+3 */
- /* #elif GEOMETRY_J == 'Water3' */
- /* #if ROUND == 'Loop' */
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
- /* #else */
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2);
- /* #endif */
- /* #define INNERFLOPS INNERFLOPS+9 */
- /* #elif GEOMETRY_J == 'Water4' */
- /* #if 0 in PARTICLES_J */
- /* #if ROUND == 'Loop' */
- gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA,f+j_coord_offsetB,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
- /* #else */
- gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA,fjx0,fjy0,fjz0,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
- /* #endif */
- /* #define INNERFLOPS INNERFLOPS+12 */
- /* #else */
- /* #if ROUND == 'Loop' */
- gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,f+j_coord_offsetB+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
- /* #else */
- gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f+j_coord_offsetA+DIM,fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3);
- /* #endif */
- /* #define INNERFLOPS INNERFLOPS+9 */
- /* #endif */
- /* #endif */
-
- /* Inner loop uses {INNERFLOPS} flops */
- }
-
- /* #endfor */
-
- /* End of innermost loop */
-
- /* #if 'Force' in KERNEL_VF */
- /* #if GEOMETRY_I == 'Particle' */
- gmx_fjsp_update_iforce_1atom_swizzle_v2r8(fix0,fiy0,fiz0,
- f+i_coord_offset,fshift+i_shift_offset);
- /* #define OUTERFLOPS OUTERFLOPS+6 */
- /* #elif GEOMETRY_I == 'Water3' */
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,
- f+i_coord_offset,fshift+i_shift_offset);
- /* #define OUTERFLOPS OUTERFLOPS+18 */
- /* #elif GEOMETRY_I == 'Water4' */
- /* #if 0 in PARTICLES_I */
- gmx_fjsp_update_iforce_4atom_swizzle_v2r8(fix0,fiy0,fiz0,fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset,fshift+i_shift_offset);
- /* #define OUTERFLOPS OUTERFLOPS+24 */
- /* #else */
- gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3,
- f+i_coord_offset+DIM,fshift+i_shift_offset);
- /* #define OUTERFLOPS OUTERFLOPS+18 */
- /* #endif */
- /* #endif */
- /* #endif */
-
- /* #if 'Potential' in KERNEL_VF */
- ggid = gid[iidx];
- /* Update potential energies */
- /* #if KERNEL_ELEC != 'None' */
- gmx_fjsp_update_1pot_v2r8(velecsum,kernel_data->energygrp_elec+ggid);
- /* #define OUTERFLOPS OUTERFLOPS+1 */
- /* #endif */
- /* #if KERNEL_VDW != 'None' */
- gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid);
- /* #define OUTERFLOPS OUTERFLOPS+1 */
- /* #endif */
- /* #endif */
-
- /* Increment number of inner iterations */
- inneriter += j_index_end - j_index_start;
-
- /* Outer loop uses {OUTERFLOPS} flops */
- }
-
- /* Increment number of outer iterations */
- outeriter += nri;
-
- /* Update outer/inner flops */
- /* ## NB: This is not important, it just affects the flopcount. However, since our preprocessor is */
- /* ## primitive and replaces aggressively even in strings inside these directives, we need to */
- /* ## assemble the main part of the name (containing KERNEL/ELEC/VDW) directly in the source. */
- /* #if GEOMETRY_I == 'Water3' */
- /* #define ISUFFIX '_W3' */
- /* #elif GEOMETRY_I == 'Water4' */
- /* #define ISUFFIX '_W4' */
- /* #else */
- /* #define ISUFFIX '' */
- /* #endif */
- /* #if GEOMETRY_J == 'Water3' */
- /* #define JSUFFIX 'W3' */
- /* #elif GEOMETRY_J == 'Water4' */
- /* #define JSUFFIX 'W4' */
- /* #else */
- /* #define JSUFFIX '' */
- /* #endif */
- /* #if 'PotentialAndForce' in KERNEL_VF */
- /* #define VFSUFFIX '_VF' */
- /* #elif 'Potential' in KERNEL_VF */
- /* #define VFSUFFIX '_V' */
- /* #else */
- /* #define VFSUFFIX '_F' */
- /* #endif */
-
- /* #if KERNEL_ELEC != 'None' and KERNEL_VDW != 'None' */
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW{ISUFFIX}{JSUFFIX}{VFSUFFIX},outeriter*{OUTERFLOPS} + inneriter*{INNERFLOPS});
- /* #elif KERNEL_ELEC != 'None' */
- inc_nrnb(nrnb,eNR_NBKERNEL_ELEC{ISUFFIX}{JSUFFIX}{VFSUFFIX},outeriter*{OUTERFLOPS} + inneriter*{INNERFLOPS});
- /* #else */
- inc_nrnb(nrnb,eNR_NBKERNEL_VDW{ISUFFIX}{JSUFFIX}{VFSUFFIX},outeriter*{OUTERFLOPS} + inneriter*{INNERFLOPS});
- /* #endif */
-}
#if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && GMX_DOUBLE
# include "gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
#endif
-#if GMX_SIMD_SPARC64_HPC_ACE && GMX_DOUBLE
-# include "gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h"
-#endif
-
static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
static gmx_bool nonbonded_setup_done = FALSE;
#endif
#if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && GMX_DOUBLE
nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
-#endif
-#if GMX_SIMD_SPARC64_HPC_ACE && GMX_DOUBLE
- nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double, kernellist_sparc64_hpc_ace_double_size);
#endif
; /* empty statement to avoid a completely empty block */
}
#if GMX_SIMD_X86_SSE4_1 && GMX_DOUBLE
/* No padding - see comment above */
{ "sse4_1_double", 1 },
-#endif
-#if GMX_SIMD_SPARC64_HPC_ACE && GMX_DOUBLE
- /* No padding - see comment above */
- { "sparc64_hpc_ace_double", 1 },
#endif
{ "c", 1 },
};